mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
chore: cleaned comments slop
This commit is contained in:
parent
f23be16b35
commit
f9b5367754
34 changed files with 274 additions and 232 deletions
|
|
@ -250,12 +250,12 @@ LANGSMITH_PROJECT=surfsense
|
|||
|
||||
|
||||
# =============================================================================
|
||||
# OPTIONAL: New-chat agent feature flags (OpenCode-port)
|
||||
# OPTIONAL: New-chat agent feature flags
|
||||
# =============================================================================
|
||||
# Master kill-switch — when true, every flag below is forced OFF.
|
||||
# SURFSENSE_DISABLE_NEW_AGENT_STACK=false
|
||||
|
||||
# Tier 1 — Agent quality
|
||||
# Agent quality
|
||||
# SURFSENSE_ENABLE_CONTEXT_EDITING=false
|
||||
# SURFSENSE_ENABLE_COMPACTION_V2=false
|
||||
# SURFSENSE_ENABLE_RETRY_AFTER=false
|
||||
|
|
@ -265,24 +265,24 @@ LANGSMITH_PROJECT=surfsense
|
|||
# SURFSENSE_ENABLE_TOOL_CALL_REPAIR=false
|
||||
# SURFSENSE_ENABLE_DOOM_LOOP=false # leave OFF until UI handles permission='doom_loop'
|
||||
|
||||
# Tier 2 — Safety
|
||||
# Safety
|
||||
# SURFSENSE_ENABLE_PERMISSION=false
|
||||
# SURFSENSE_ENABLE_BUSY_MUTEX=false
|
||||
# SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false # adds a per-turn LLM call
|
||||
|
||||
# Tier 3b — Observability (also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
||||
# Observability — OTel (also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
||||
# SURFSENSE_ENABLE_OTEL=false
|
||||
|
||||
# Tier 4 — Skills + subagents
|
||||
# Skills + subagents
|
||||
# SURFSENSE_ENABLE_SKILLS=false
|
||||
# SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
|
||||
# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
|
||||
|
||||
# Tier 5 — Snapshot / revert
|
||||
# Snapshot / revert
|
||||
# SURFSENSE_ENABLE_ACTION_LOG=false
|
||||
# SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships
|
||||
|
||||
# Tier 6 — Plugins
|
||||
# Plugins
|
||||
# SURFSENSE_ENABLE_PLUGIN_LOADER=false
|
||||
# Comma-separated allowlist of plugin entry-point names
|
||||
# SURFSENSE_ALLOWED_PLUGINS=year_substituter
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@ Revision ID: 130
|
|||
Revises: 129
|
||||
Create Date: 2026-04-28
|
||||
|
||||
Tier 5.2 in the OpenCode-port plan. Adds the append-only ``agent_action_log``
|
||||
table that :class:`ActionLogMiddleware` writes to after every tool call.
|
||||
Adds the append-only ``agent_action_log`` table that
|
||||
:class:`ActionLogMiddleware` writes to after every tool call. Each row
|
||||
optionally carries a ``reverse_descriptor`` payload used by
|
||||
``POST /api/threads/{thread_id}/revert/{action_id}`` to undo the action.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Revision ID: 131
|
|||
Revises: 130
|
||||
Create Date: 2026-04-28
|
||||
|
||||
Tier 5.1 in the OpenCode-port plan. Adds two snapshot tables:
|
||||
Adds two snapshot tables that back the per-action revert flow:
|
||||
|
||||
* ``document_revisions``: pre-mutation snapshot of NOTE/FILE/EXTENSION docs.
|
||||
* ``folder_revisions``: pre-mutation snapshot of folder mkdir/move/delete.
|
||||
|
|
|
|||
|
|
@ -4,11 +4,10 @@ Revision ID: 132
|
|||
Revises: 131
|
||||
Create Date: 2026-04-28
|
||||
|
||||
Tier 2.1 in the OpenCode-port plan. Adds the persistent ``agent_permission_rules``
|
||||
table consumed by :class:`PermissionMiddleware` at agent build time. Rules
|
||||
can be scoped at search-space (``user_id`` / ``thread_id`` NULL),
|
||||
user-wide (``user_id`` set, ``thread_id`` NULL), or per-thread
|
||||
(``thread_id`` set).
|
||||
Adds the persistent ``agent_permission_rules`` table consumed by
|
||||
:class:`PermissionMiddleware` at agent build time. Rules can be scoped
|
||||
at search-space (``user_id`` / ``thread_id`` NULL), user-wide
|
||||
(``user_id`` set, ``thread_id`` NULL), or per-thread (``thread_id`` set).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
|||
|
|
@ -353,11 +353,12 @@ async def create_surfsense_deep_agent(
|
|||
additional_tools=list(additional_tools) if additional_tools else None,
|
||||
)
|
||||
|
||||
# Tier 1.6: register `invalid` tool. It is dispatched only when
|
||||
# ToolCallNameRepairMiddleware rewrites a malformed call. We
|
||||
# intentionally append it AFTER ``build_tools_async`` so it never
|
||||
# appears in the system-prompt tool list (which is built from the
|
||||
# registry, not the bound tool list).
|
||||
# Register the ``invalid`` tool only when tool-call repair is on. It
|
||||
# is dispatched only when :class:`ToolCallNameRepairMiddleware`
|
||||
# rewrites a malformed call. We intentionally append it AFTER
|
||||
# ``build_tools_async`` so it never appears in the system-prompt
|
||||
# tool list (which is built from the registry, not the bound tool
|
||||
# list).
|
||||
_flags: AgentFeatureFlags = get_flags()
|
||||
if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
|
||||
t.name for t in tools
|
||||
|
|
@ -455,10 +456,10 @@ async def create_surfsense_deep_agent(
|
|||
return agent
|
||||
|
||||
|
||||
# Tier 1.1: tools whose output is too costly / lossy to discard. Keep
|
||||
# this conservative — anything listed here is *never* pruned by
|
||||
# ContextEditingMiddleware. The list is filtered against actually-bound
|
||||
# tool names so disabled connectors don't show up here.
|
||||
# Tools whose output is too costly / lossy to discard. Keep this
|
||||
# conservative — anything listed here is *never* pruned by
|
||||
# :class:`ContextEditingMiddleware`. The list is filtered against
|
||||
# actually-bound tool names so disabled connectors don't show up here.
|
||||
_PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
"generate_report",
|
||||
|
|
@ -485,11 +486,12 @@ def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
|
|||
return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled)
|
||||
|
||||
|
||||
# Tier 2.1 / cleanup: opencode `Permission.disabled` parity. Replaces the
|
||||
# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE``-based gating with a
|
||||
# declarative pass over :data:`BUILTIN_TOOLS`. Each tool that declares a
|
||||
# ``required_connector`` not present in ``available_connectors`` gets a
|
||||
# deny rule so any execution attempt short-circuits with permission_denied.
|
||||
# Connector gating: any tool whose ``ToolDefinition.required_connector``
|
||||
# isn't actually wired up gets a synthesized permission deny rule so
|
||||
# execution attempts short-circuit with ``permission_denied`` instead of
|
||||
# bubbling up provider-specific 401/404 errors. Mirrors OpenCode's
|
||||
# ``Permission.disabled`` (declarative, per-tool gating) — replaces the
|
||||
# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic.
|
||||
def _synthesize_connector_deny_rules(
|
||||
*,
|
||||
available_connectors: list[str] | None,
|
||||
|
|
@ -503,11 +505,6 @@ def _synthesize_connector_deny_rules(
|
|||
1. It is currently bound (``enabled_tool_names``).
|
||||
2. It declares a ``required_connector``.
|
||||
3. That connector is *not* in ``available_connectors``.
|
||||
|
||||
This expresses the OpenCode ``Permission.disabled`` semantics
|
||||
declaratively, replacing the substring-heuristic binary gating
|
||||
that used to consult the hardcoded ``_CONNECTOR_TYPE_TO_SEARCHABLE``
|
||||
map.
|
||||
"""
|
||||
available = set(available_connectors or [])
|
||||
deny: list[Rule] = []
|
||||
|
|
@ -581,7 +578,7 @@ def _build_compiled_agent_blocking(
|
|||
"middleware": gp_middleware,
|
||||
}
|
||||
|
||||
# Tier 4.3: specialized user-facing subagents (explore, report_writer,
|
||||
# Specialized user-facing subagents (explore, report_writer,
|
||||
# connector_negotiator). Registered through SubAgentMiddleware alongside
|
||||
# the general-purpose spec so the parent's `task` tool can address them
|
||||
# by name. Off by default until the flag flips so existing deployments
|
||||
|
|
@ -629,14 +626,13 @@ def _build_compiled_agent_blocking(
|
|||
# ``wrap_model_call`` ordering: the FIRST middleware in the list is the
|
||||
# OUTERMOST wrapper. To ensure prune executes before summarization,
|
||||
# place ``SpillingContextEditingMiddleware`` before
|
||||
# ``SurfSenseCompactionMiddleware`` (Tier 1.1 + 1.3).
|
||||
# Compaction is the canonical token-budget defense after the
|
||||
# cleanup tier removed ``SafeSummarizationMiddleware``. The Bedrock
|
||||
# buffer-empty defense is folded into ``SurfSenseCompactionMiddleware``.
|
||||
# ``SurfSenseCompactionMiddleware``. Compaction is the canonical
|
||||
# token-budget defense; the Bedrock buffer-empty defense is folded
|
||||
# into ``SurfSenseCompactionMiddleware``.
|
||||
summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend)
|
||||
_ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity
|
||||
|
||||
# Tier 1.1: ContextEditing prune. Trigger at 55% of model_max_input,
|
||||
# ContextEditing prune. Trigger at 55% of ``max_input_tokens``,
|
||||
# earlier than summarization (~85%). When disabled, no edit runs.
|
||||
context_edit_mw = None
|
||||
if (
|
||||
|
|
@ -664,7 +660,10 @@ def _build_compiled_agent_blocking(
|
|||
backend_resolver=backend_resolver,
|
||||
)
|
||||
|
||||
# Tier 1.4 / 1.8 / 1.9 / 1.10: built-in retry/fallback/limits.
|
||||
# Resilience knobs: header-aware retry, model fallback, and
|
||||
# per-thread / per-run call-count limits. The fallback / limit
|
||||
# middlewares are vanilla LangChain primitives; ``RetryAfter`` is
|
||||
# SurfSense's header-aware variant (see its module docstring).
|
||||
retry_mw = (
|
||||
RetryAfterMiddleware(max_retries=3)
|
||||
if flags.enable_retry_after and not flags.disable_new_agent_stack
|
||||
|
|
@ -700,14 +699,16 @@ def _build_compiled_agent_blocking(
|
|||
else None
|
||||
)
|
||||
|
||||
# Tier 1.5: provider-compat _noop injection.
|
||||
# Provider-compat ``_noop`` injection (mirrors OpenCode's
|
||||
# ``llm.ts`` workaround for providers that reject empty assistant
|
||||
# turns or alternating-role constraints).
|
||||
noop_mw = (
|
||||
NoopInjectionMiddleware()
|
||||
if flags.enable_compaction_v2 and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 1.7: tool-call name repair (lowercase + invalid fallback).
|
||||
# Tool-call name repair (lowercase + ``invalid`` fallback).
|
||||
#
|
||||
# ``registered_tool_names`` MUST cover every tool the model can legitimately
|
||||
# call. That includes the bound ``tools`` list AND every tool provided by
|
||||
|
|
@ -737,18 +738,22 @@ def _build_compiled_agent_blocking(
|
|||
}
|
||||
repair_mw = ToolCallNameRepairMiddleware(
|
||||
registered_tool_names=registered_names,
|
||||
fuzzy_match_threshold=None, # opencode parity: no fuzzy step
|
||||
# Disable fuzzy matching to avoid silent rewrites; the
|
||||
# lowercase + ``invalid`` fallback alone covers >95% of
|
||||
# observed model errors.
|
||||
fuzzy_match_threshold=None,
|
||||
)
|
||||
|
||||
# Tier 1.11: doom-loop detector. Off by default until UI handles.
|
||||
# Doom-loop detector. Off by default until the frontend handles
|
||||
# ``permission == "doom_loop"`` interrupts.
|
||||
doom_loop_mw = (
|
||||
DoomLoopMiddleware(threshold=3)
|
||||
if flags.enable_doom_loop and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 2.1: PermissionMiddleware. Layers, earliest -> latest (last
|
||||
# match wins per opencode):
|
||||
# PermissionMiddleware. Layers, earliest -> latest (last match wins,
|
||||
# same evaluation order as OpenCode's ``permission/index.ts``):
|
||||
#
|
||||
# 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense
|
||||
# already runs per-tool HITL (see ``tools/hitl.py``) for mutating
|
||||
|
|
@ -778,11 +783,11 @@ def _build_compiled_agent_blocking(
|
|||
],
|
||||
)
|
||||
|
||||
# Tier 5.2: ActionLogMiddleware. Off by default until the
|
||||
# ``agent_action_log`` table is migrated. When enabled, persists one
|
||||
# row per tool call with optional reverse_descriptor for
|
||||
# /api/threads/{thread_id}/revert/{action_id}. Sits inside permission
|
||||
# so denied calls aren't logged as completions.
|
||||
# ActionLogMiddleware. Off by default until the ``agent_action_log``
|
||||
# table is migrated. When enabled, persists one row per tool call
|
||||
# with optional reverse_descriptor for
|
||||
# ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside
|
||||
# ``permission`` so denied calls aren't logged as completions.
|
||||
action_log_mw: ActionLogMiddleware | None = None
|
||||
if (
|
||||
flags.enable_action_log
|
||||
|
|
@ -804,23 +809,24 @@ def _build_compiled_agent_blocking(
|
|||
)
|
||||
action_log_mw = None
|
||||
|
||||
# Tier 2.2: per-thread busy mutex.
|
||||
# Per-thread busy mutex (refuse a second concurrent turn on the same
|
||||
# thread; see :class:`BusyMutexMiddleware` docstring).
|
||||
busy_mutex_mw: BusyMutexMiddleware | None = (
|
||||
BusyMutexMiddleware()
|
||||
if flags.enable_busy_mutex and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 3b: OpenTelemetry spans (model.call + tool.call). Lives just
|
||||
# inside BusyMutex so it spans every retry/fallback attempt of the
|
||||
# current turn but never wraps a queued/blocked turn.
|
||||
# OpenTelemetry spans (model.call + tool.call). Lives just inside
|
||||
# BusyMutex so it spans every retry/fallback attempt of the current
|
||||
# turn but never wraps a queued/blocked turn.
|
||||
otel_mw: OtelSpanMiddleware | None = (
|
||||
OtelSpanMiddleware()
|
||||
if flags.enable_otel and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 6: plugin entry-point loader. Off by default; opt-in via the
|
||||
# Plugin entry-point loader. Off by default; opt-in via the
|
||||
# ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from
|
||||
# the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future
|
||||
# PR can wire it through ``global_llm_config.yaml``.
|
||||
|
|
@ -845,10 +851,10 @@ def _build_compiled_agent_blocking(
|
|||
)
|
||||
plugin_middlewares = []
|
||||
|
||||
# Tier 4.1: SkillsMiddleware. Loads built-in + space-authored skills
|
||||
# via a CompositeBackend. Sources are layered: built-in first, space
|
||||
# last, so a search-space-authored skill of the same name overrides
|
||||
# the bundled one.
|
||||
# SkillsMiddleware (deepagents) loads built-in + space-authored
|
||||
# skills via a CompositeBackend. Sources are layered: built-in first,
|
||||
# space last, so a search-space-authored skill of the same name
|
||||
# overrides the bundled one.
|
||||
skills_mw: SkillsMiddleware | None = None
|
||||
if flags.enable_skills and not flags.disable_new_agent_stack:
|
||||
try:
|
||||
|
|
@ -865,7 +871,8 @@ def _build_compiled_agent_blocking(
|
|||
logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
|
||||
skills_mw = None
|
||||
|
||||
# Tier 2.5: LLM-driven tool selection for >30 tools.
|
||||
# LangChain's LLM-driven tool selection — only enabled for stacks
|
||||
# large enough to need narrowing (>30 tools).
|
||||
selector_mw: LLMToolSelectorMiddleware | None = None
|
||||
if (
|
||||
flags.enable_llm_tool_selector
|
||||
|
|
@ -934,12 +941,12 @@ def _build_compiled_agent_blocking(
|
|||
)
|
||||
if filesystem_mode == FilesystemMode.CLOUD
|
||||
else None,
|
||||
# Tier 4.1: skill loader. Placed before SubAgentMiddleware so
|
||||
# subagents inherit the same skill metadata (subagent specs reference
|
||||
# the same source paths via `default_skills_sources()`).
|
||||
# Skill loader. Placed before SubAgentMiddleware so subagents
|
||||
# inherit the same skill metadata (subagent specs reference the
|
||||
# same source paths via ``default_skills_sources()``).
|
||||
skills_mw,
|
||||
SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs),
|
||||
# Tier 2.5: tool selection (only when >30 tools and flag on).
|
||||
# Tool selection (only when >30 tools and flag on).
|
||||
selector_mw,
|
||||
# Defensive caps, then prune, then summarize.
|
||||
model_call_limit_mw,
|
||||
|
|
@ -954,19 +961,19 @@ def _build_compiled_agent_blocking(
|
|||
# Tool-call repair must run after model emits but before
|
||||
# permission / dedup / doom-loop interpret the calls.
|
||||
repair_mw,
|
||||
# Tier 2.1: deny/ask BEFORE the calls are forwarded to tool nodes.
|
||||
# Permission deny/ask BEFORE the calls are forwarded to tool nodes.
|
||||
permission_mw,
|
||||
doom_loop_mw,
|
||||
# Tier 5.2: action log sits inside permission so denied calls
|
||||
# don't appear as completions, and outside dedup so each unique
|
||||
# tool invocation gets its own row.
|
||||
# Action log sits inside permission so denied calls don't appear
|
||||
# as completions, and outside dedup so each unique tool invocation
|
||||
# gets its own row.
|
||||
action_log_mw,
|
||||
PatchToolCallsMiddleware(),
|
||||
DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
|
||||
# Tier 6: plugin slot — sits just before AnthropicCache so plugin-side
|
||||
# transforms see the final tool result and run before any caching
|
||||
# heuristics. Multiple plugins in declared order; loader filtered by
|
||||
# the admin allowlist already.
|
||||
# Plugin slot — sits just before AnthropicCache so plugin-side
|
||||
# transforms see the final tool result and run before any
|
||||
# caching heuristics. Multiple plugins in declared order; loader
|
||||
# filtered by the admin allowlist already.
|
||||
*plugin_middlewares,
|
||||
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
Typed error taxonomy for the SurfSense agent stack.
|
||||
|
||||
Used by:
|
||||
- :class:`RetryAfterMiddleware` (Tier 1.4) — its ``retry_on`` callable
|
||||
consults the error code to decide whether a retry is appropriate.
|
||||
- :class:`PermissionMiddleware` (Tier 2.1) — emits
|
||||
``code="permission_denied"`` errors when a deny rule trips.
|
||||
- :class:`RetryAfterMiddleware` — its ``retry_on`` callable consults
|
||||
the error code to decide whether a retry is appropriate.
|
||||
- :class:`PermissionMiddleware` — emits ``code="permission_denied"``
|
||||
errors when a deny rule trips.
|
||||
- All tools — return :class:`StreamingError` payloads in
|
||||
``ToolMessage.additional_kwargs["error"]`` so the model and the
|
||||
retry/permission layers share a contract.
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
"""
|
||||
Feature flags for the SurfSense new_chat agent stack.
|
||||
|
||||
These flags control rollout of OpenCode-pattern middleware ported into
|
||||
SurfSense. They follow a "default-OFF for risky things, default-ON for
|
||||
safe upgrades, master kill-switch for everything new" model.
|
||||
These flags gate the newer agent middleware (some ported from OpenCode,
|
||||
some sourced from ``langchain.agents.middleware`` / ``deepagents``, some
|
||||
SurfSense-native). They follow a "default-OFF for risky things,
|
||||
default-ON for safe upgrades, master kill-switch for everything new" model.
|
||||
|
||||
All new middleware checks its flag at agent build time. If the master
|
||||
kill-switch ``SURFSENSE_DISABLE_NEW_AGENT_STACK`` is set, every new
|
||||
|
|
@ -57,7 +58,7 @@ class AgentFeatureFlags:
|
|||
# regardless of its env value. Used for rapid rollback.
|
||||
disable_new_agent_stack: bool = False
|
||||
|
||||
# Tier 1 — Agent quality
|
||||
# Agent quality — context budget, retry/limits, name-repair, doom-loop
|
||||
enable_context_editing: bool = False
|
||||
enable_compaction_v2: bool = False
|
||||
enable_retry_after: bool = False
|
||||
|
|
@ -69,26 +70,26 @@ class AgentFeatureFlags:
|
|||
False # Default OFF until UI handles permission='doom_loop'
|
||||
)
|
||||
|
||||
# Tier 2 — Safety
|
||||
# Safety — permissions, concurrency, tool-set narrowing
|
||||
enable_permission: bool = False # Default OFF for first deploy
|
||||
enable_busy_mutex: bool = False
|
||||
enable_llm_tool_selector: bool = False # Default OFF — adds per-turn LLM cost
|
||||
|
||||
# Tier 4 — Skills + subagents
|
||||
# Skills + subagents
|
||||
enable_skills: bool = False
|
||||
enable_specialized_subagents: bool = False
|
||||
enable_kb_planner_runnable: bool = False
|
||||
|
||||
# Tier 5 — Snapshot / revert
|
||||
# Snapshot / revert
|
||||
enable_action_log: bool = False
|
||||
enable_revert_route: bool = (
|
||||
False # Backend ships before UI; route returns 503 until this flips
|
||||
)
|
||||
|
||||
# Tier 6 — Plugins
|
||||
# Plugins
|
||||
enable_plugin_loader: bool = False
|
||||
|
||||
# Tier 3b — OTel (orthogonal: also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
||||
# Observability — OTel (orthogonal; also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
||||
enable_otel: bool = False
|
||||
|
||||
@classmethod
|
||||
|
|
@ -108,7 +109,7 @@ class AgentFeatureFlags:
|
|||
|
||||
return cls(
|
||||
disable_new_agent_stack=False,
|
||||
# Tier 1
|
||||
# Agent quality
|
||||
enable_context_editing=_env_bool("SURFSENSE_ENABLE_CONTEXT_EDITING", False),
|
||||
enable_compaction_v2=_env_bool("SURFSENSE_ENABLE_COMPACTION_V2", False),
|
||||
enable_retry_after=_env_bool("SURFSENSE_ENABLE_RETRY_AFTER", False),
|
||||
|
|
@ -121,13 +122,13 @@ class AgentFeatureFlags:
|
|||
"SURFSENSE_ENABLE_TOOL_CALL_REPAIR", False
|
||||
),
|
||||
enable_doom_loop=_env_bool("SURFSENSE_ENABLE_DOOM_LOOP", False),
|
||||
# Tier 2
|
||||
# Safety
|
||||
enable_permission=_env_bool("SURFSENSE_ENABLE_PERMISSION", False),
|
||||
enable_busy_mutex=_env_bool("SURFSENSE_ENABLE_BUSY_MUTEX", False),
|
||||
enable_llm_tool_selector=_env_bool(
|
||||
"SURFSENSE_ENABLE_LLM_TOOL_SELECTOR", False
|
||||
),
|
||||
# Tier 4
|
||||
# Skills + subagents
|
||||
enable_skills=_env_bool("SURFSENSE_ENABLE_SKILLS", False),
|
||||
enable_specialized_subagents=_env_bool(
|
||||
"SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", False
|
||||
|
|
@ -135,12 +136,12 @@ class AgentFeatureFlags:
|
|||
enable_kb_planner_runnable=_env_bool(
|
||||
"SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", False
|
||||
),
|
||||
# Tier 5
|
||||
# Snapshot / revert
|
||||
enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", False),
|
||||
enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", False),
|
||||
# Tier 6
|
||||
# Plugins
|
||||
enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False),
|
||||
# Tier 3b
|
||||
# Observability
|
||||
enable_otel=_env_bool("SURFSENSE_ENABLE_OTEL", False),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,16 @@
|
|||
"""
|
||||
BusyMutexMiddleware — per-thread asyncio lock + cancel token.
|
||||
|
||||
Tier 2.2 in the OpenCode-port plan. Mirrors opencode's
|
||||
``Stream.scoped(AbortController)`` pattern (single-process, in-memory
|
||||
lock + cooperative cancellation). For multi-worker deployments a
|
||||
distributed lock backend (Redis or PostgreSQL advisory locks) is a
|
||||
phase-2 follow-up.
|
||||
LangChain has no built-in concept of "this thread is already running a
|
||||
turn — refuse the second concurrent request". Without it, a user
|
||||
double-clicking "send" or refreshing the page mid-stream can spawn two
|
||||
turns racing on the same checkpoint, producing duplicated tool calls
|
||||
and mangled state.
|
||||
|
||||
Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a
|
||||
single-process, in-memory lock + cooperative cancellation token keyed by
|
||||
``thread_id``. For multi-worker deployments a distributed lock backend
|
||||
(Redis or PostgreSQL advisory locks) is a phase-2 follow-up.
|
||||
|
||||
What this provides:
|
||||
- A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
|
||||
|
|
|
|||
|
|
@ -5,21 +5,22 @@ Subclasses :class:`deepagents.middleware.summarization.SummarizationMiddleware`
|
|||
to add SurfSense-specific behavior:
|
||||
|
||||
1. **Structured summary template** (OpenCode-style ``## Goal / Constraints /
|
||||
Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``).
|
||||
Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``)
|
||||
— see :data:`SURFSENSE_SUMMARY_PROMPT` below. The base
|
||||
``SummarizationMiddleware`` only ships a freeform "summarize this"
|
||||
prompt; the structured template is ported from OpenCode's
|
||||
``compaction.ts``.
|
||||
2. **Protect SurfSense-specific SystemMessages** so injected hints
|
||||
(``<priority_documents>``, ``<workspace_tree>``, ``<file_operation_contract>``,
|
||||
``<user_memory>``, ``<team_memory>``, ``<user_name>``, ``<memory_warning>``)
|
||||
are *not* summarized away and are kept verbatim in the post-summary
|
||||
message list.
|
||||
message list. Mirrors OpenCode's ``PRUNE_PROTECTED_TOOLS`` philosophy
|
||||
(some message types are part of the agent's contract and must survive
|
||||
compaction unchanged).
|
||||
3. **Sanitize ``content=None``** when feeding messages into ``get_buffer_string``
|
||||
(Azure OpenAI / LiteLLM defense — when a provider streams an AIMessage
|
||||
containing only tool_calls and no text, ``content`` can be ``None`` and
|
||||
``get_buffer_string`` crashes iterating over ``None``). This used to live in
|
||||
``safe_summarization.py``; folded in here.
|
||||
|
||||
This replaces ``app.agents.new_chat.middleware.safe_summarization``.
|
||||
|
||||
Tier 1.3 in the OpenCode-port plan.
|
||||
``get_buffer_string`` crashes iterating over ``None``). SurfSense-specific.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -42,7 +43,7 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# OpenCode-faithful structured summary template. Mirrors
|
||||
# Structured summary template ported from OpenCode's
|
||||
# ``opencode/packages/opencode/src/session/compaction.ts:40-75``. Kept as a
|
||||
# module-level constant so unit tests can assert on its sections.
|
||||
SURFSENSE_SUMMARY_PROMPT = """<role>
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
"""
|
||||
SpillToBackendEdit + SpillingContextEditingMiddleware.
|
||||
|
||||
Mirrors OpenCode's spill-to-disk behavior in
|
||||
``opencode/packages/opencode/src/tool/truncate.ts``. Before
|
||||
``ClearToolUsesEdit`` rewrites old ``ToolMessage.content`` to a placeholder,
|
||||
we capture the full original content and write it to the runtime backend
|
||||
under ``/tool_outputs/{thread_id}/{message_id}.txt``. The placeholder is
|
||||
upgraded to ``"[cleared — full output at /tool_outputs/.../{id}.txt; ask the
|
||||
explore subagent to read it]"`` so the agent can recover it on demand.
|
||||
|
||||
Tier 1.2 in the OpenCode-port plan.
|
||||
LangChain's :class:`ClearToolUsesEdit` discards old ``ToolMessage.content``
|
||||
when the context-editing budget triggers, replacing the body with a fixed
|
||||
placeholder. That's lossy: anything the agent might want to revisit is
|
||||
gone. The spill-to-disk pattern (originally from OpenCode's
|
||||
``opencode/packages/opencode/src/tool/truncate.ts``) keeps the prune
|
||||
behavior but writes the full original payload to the runtime backend
|
||||
under ``/tool_outputs/{thread_id}/{message_id}.txt`` first. The
|
||||
placeholder is then upgraded to point at the spill path so the agent
|
||||
(or a subagent) can read it back on demand.
|
||||
|
||||
Why this is a middleware subclass instead of a plain ``ContextEdit``:
|
||||
``ContextEdit.apply`` is sync, but writing to the backend is async. We
|
||||
|
|
|
|||
|
|
@ -9,11 +9,10 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
|
|||
That means it is also safe across LangGraph ``interrupt()`` boundaries:
|
||||
the removed call will never appear on graph resume.
|
||||
|
||||
Dedup-key resolution order (Tier 2.3 / cleanup in the OpenCode-port plan):
|
||||
Dedup-key resolution order:
|
||||
|
||||
1. :class:`ToolDefinition.dedup_key` — callable provided by the registry
|
||||
entry. This is the canonical mechanism after the cleanup-tier removal
|
||||
of the legacy ``PRIMARY_ARG`` map.
|
||||
entry. This is the canonical mechanism.
|
||||
2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name;
|
||||
used by MCP / Composio tools whose schemas the registry doesn't see.
|
||||
|
||||
|
|
@ -72,9 +71,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
The dedup-resolver map is built from two sources, in priority order:
|
||||
|
||||
1. ``tool.metadata["dedup_key"]`` — callable provided by the registry's
|
||||
``ToolDefinition.dedup_key`` (Tier 2.3). Receives the args dict
|
||||
and returns a string signature. This is the canonical mechanism
|
||||
after the cleanup-tier removal of the legacy ``PRIMARY_ARG`` map.
|
||||
``ToolDefinition.dedup_key``. Receives the args dict and returns
|
||||
a string signature. This is the canonical mechanism.
|
||||
2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg
|
||||
name; primarily used by MCP / Composio tools.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,17 +1,19 @@
|
|||
"""
|
||||
DoomLoopMiddleware — pattern-based detector for repeated identical tool calls.
|
||||
|
||||
Mirrors ``opencode/packages/opencode/src/session/processor.ts`` doom-loop
|
||||
behavior. When the same tool with the same arguments is called N times
|
||||
in a row, the agent has likely entered an infinite loop. We surface this
|
||||
to the user as an interrupt with ``permission="doom_loop"`` so the UI
|
||||
can render an "Are you stuck? Continue / cancel?" affordance.
|
||||
LangChain has :class:`ToolCallLimitMiddleware` which caps the *total* number
|
||||
of tool calls per turn — but it can't tell apart "10 distinct, useful
|
||||
calls" from "the same call 10 times in a row". This middleware fills that
|
||||
gap with a sliding-window check on tool-call signatures, ported from
|
||||
OpenCode's ``packages/opencode/src/session/processor.ts``.
|
||||
|
||||
Tier 1.11 in the OpenCode-port plan.
|
||||
When the same tool with the same arguments is called N times in a row,
|
||||
the agent has likely entered an infinite loop. We surface this to the
|
||||
user as an interrupt with ``permission="doom_loop"`` so the UI can
|
||||
render an "Are you stuck? Continue / cancel?" affordance.
|
||||
|
||||
This ships **OFF by default** until the frontend explicitly handles
|
||||
``context.permission == "doom_loop"`` interrupts (the plan flips
|
||||
``SURFSENSE_ENABLE_DOOM_LOOP=true`` once the UI is ready).
|
||||
``context.permission == "doom_loop"`` interrupts.
|
||||
|
||||
Wire format: uses SurfSense's existing ``interrupt()`` payload shape
|
||||
(see ``app/agents/new_chat/tools/hitl.py``):
|
||||
|
|
@ -69,7 +71,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon
|
|||
|
||||
Args:
|
||||
threshold: How many consecutive identical signatures count as a
|
||||
doom loop. Default 3 (opencode parity).
|
||||
doom loop. Default 3 (matches OpenCode's processor.ts).
|
||||
"""
|
||||
|
||||
def __init__(self, *, threshold: int = 3) -> None:
|
||||
|
|
@ -182,7 +184,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon
|
|||
signatures[-1] if signatures else "<empty>",
|
||||
)
|
||||
|
||||
# Tier 3b: interrupt.raised span with permission=doom_loop attribute
|
||||
# Open an interrupt.raised span with permission=doom_loop attribute
|
||||
# so dashboards can break out doom-loop interrupts from regular
|
||||
# permission asks via the ``interrupt.permission`` attribute.
|
||||
with ot.interrupt_span(
|
||||
|
|
|
|||
|
|
@ -592,10 +592,11 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
self.available_document_types = available_document_types
|
||||
self.top_k = top_k
|
||||
self.mentioned_document_ids = mentioned_document_ids or []
|
||||
# Tier 4.2: build the kb-planner private Runnable ONCE here so we
|
||||
# don't pay the create_agent compile cost (50-200ms) on every turn.
|
||||
# Disabled by default behind ``enable_kb_planner_runnable``; when off
|
||||
# the planner falls back to the legacy ``self.llm.ainvoke`` path.
|
||||
# Build the kb-planner private Runnable ONCE here so we don't pay
|
||||
# the ``create_agent`` compile cost (50-200ms) on every turn.
|
||||
# Disabled by default behind ``enable_kb_planner_runnable``; when
|
||||
# off the planner falls back to the legacy ``self.llm.ainvoke``
|
||||
# path.
|
||||
self._planner: Runnable | None = None
|
||||
self._planner_compile_failed = False
|
||||
|
||||
|
|
@ -608,9 +609,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
lazily on first call, then memoized via ``self._planner``.
|
||||
|
||||
The compiled agent is constructed without tools — the planner's
|
||||
contract is "answer with structured JSON" — but with ``RetryAfter``
|
||||
+ the OpenCode-port retry/limit middleware so it shares the parent
|
||||
agent's resilience guarantees.
|
||||
contract is "answer with structured JSON" — but it inherits the
|
||||
:class:`RetryAfterMiddleware` so transient rate-limit errors
|
||||
from the planner LLM call don't fail the whole turn.
|
||||
"""
|
||||
if self._planner is not None or self._planner_compile_failed:
|
||||
return self._planner
|
||||
|
|
@ -658,9 +659,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
loop = asyncio.get_running_loop()
|
||||
t0 = loop.time()
|
||||
|
||||
# Tier 4.2: prefer the compiled-once planner Runnable when enabled;
|
||||
# otherwise fall back to ``self.llm.ainvoke``. The ``surfsense:internal``
|
||||
# tag is preserved on both paths so ``_stream_agent_events`` still
|
||||
# Prefer the compiled-once planner Runnable when enabled; otherwise
|
||||
# fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` tag
|
||||
# is preserved on both paths so ``_stream_agent_events`` still
|
||||
# suppresses the planner's intermediate events from the UI.
|
||||
planner = self._build_kb_planner_runnable()
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,18 +1,23 @@
|
|||
"""
|
||||
``_noop`` provider-compatibility tool + injection middleware.
|
||||
|
||||
OpenCode injects a ``_noop`` tool for LiteLLM/Bedrock/Copilot when the
|
||||
model call has empty tools but message history includes prior
|
||||
``tool_calls`` — some providers 400 in that shape (see
|
||||
``opencode/packages/opencode/src/session/llm.ts:209-228``). SurfSense uses
|
||||
LiteLLM, and the compaction summarize call (no tools, history full of
|
||||
tool calls) hits this. Tier 1.5 in the OpenCode-port plan.
|
||||
Some providers (LiteLLM, Bedrock, Copilot) 400 when a model call has
|
||||
empty ``tools`` but the message history includes prior ``tool_calls`` —
|
||||
they treat that shape as malformed even though it's perfectly valid
|
||||
LangChain. SurfSense hits this on the compaction summarize call (no
|
||||
tools, history full of tool calls).
|
||||
|
||||
Ported from OpenCode's ``packages/opencode/src/session/llm.ts:209-228``,
|
||||
which discovered and codified the workaround: inject a no-op tool *only*
|
||||
on those provider shapes so the request validates without ever being
|
||||
called.
|
||||
|
||||
Operation: a :class:`NoopInjectionMiddleware` ``wrap_model_call`` checks
|
||||
if the request has zero tools but the last AI message in history includes
|
||||
``tool_calls``. If yes, it injects the ``_noop`` tool only — never globally,
|
||||
mirroring opencode's gating exactly. The :func:`noop_tool` returns empty
|
||||
content when called (which it should never be in practice).
|
||||
``tool_calls``. If yes, it injects the ``_noop`` tool only — never
|
||||
globally — mirroring OpenCode's gating exactly. The :func:`noop_tool`
|
||||
returns empty content when called (which it should never be in
|
||||
practice).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -45,8 +50,9 @@ def noop_tool() -> str:
|
|||
|
||||
|
||||
# Provider markers that benefit from ``_noop`` injection. These match
|
||||
# opencode's gating list. We also accept any string containing one of
|
||||
# these substrings (so e.g. ``litellm`` matches ``ChatLiteLLM``).
|
||||
# OpenCode's gating list (``llm.ts:209-228``). We also accept any string
|
||||
# containing one of these substrings so e.g. ``litellm`` matches
|
||||
# ``ChatLiteLLM``.
|
||||
_NOOP_NEEDED_PROVIDERS: tuple[str, ...] = (
|
||||
"litellm",
|
||||
"bedrock",
|
||||
|
|
|
|||
|
|
@ -3,14 +3,14 @@ OpenTelemetry span middleware for the SurfSense ``new_chat`` agent.
|
|||
|
||||
Wraps both ``model.call`` (LLM invocations) and ``tool.call`` (tool
|
||||
executions) with OTel spans, attaching low-cardinality span names and
|
||||
high-cardinality identifiers as attributes (per the Tier 3b plan).
|
||||
high-cardinality identifiers as attributes.
|
||||
|
||||
This middleware is intentionally a thin adapter over
|
||||
:mod:`app.observability.otel`; when OTel is not configured all spans
|
||||
collapse to no-ops and the wrapper adds <1µs overhead per call. When
|
||||
OTel **is** configured (``OTEL_EXPORTER_OTLP_ENDPOINT`` set), every
|
||||
model and tool call gets a span with the standard attributes the
|
||||
plan's dashboards expect.
|
||||
model and tool call gets a span with the standard attributes our
|
||||
dashboards expect.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
|||
|
|
@ -1,10 +1,15 @@
|
|||
"""
|
||||
PermissionMiddleware — pattern-based allow/deny/ask with HITL fallback.
|
||||
|
||||
Mirrors ``opencode/packages/opencode/src/permission/index.ts`` but uses
|
||||
SurfSense's existing ``interrupt({type, action, context})`` payload shape
|
||||
(see ``app/agents/new_chat/tools/hitl.py``) so the frontend keeps
|
||||
working unchanged. Tier 2.1 in the OpenCode-port plan.
|
||||
LangChain's :class:`HumanInTheLoopMiddleware` only supports a static
|
||||
"this tool always asks" decision per tool. There's no rule-based
|
||||
allow/deny/ask layered ruleset, no glob patterns, no per-search-space or
|
||||
per-thread overrides, and no auto-deny synthesis.
|
||||
|
||||
This middleware ports OpenCode's ``packages/opencode/src/permission/index.ts``
|
||||
ruleset model on top of SurfSense's existing ``interrupt({type, action,
|
||||
context})`` payload shape (see ``app/agents/new_chat/tools/hitl.py``) so
|
||||
the frontend keeps working unchanged.
|
||||
|
||||
Operation:
|
||||
1. ``aafter_model`` inspects the latest ``AIMessage.tool_calls``.
|
||||
|
|
@ -24,9 +29,9 @@ Operation:
|
|||
|
||||
The middleware also performs a *pre-model* tool-filter step (the
|
||||
``before_model`` hook) so globally denied tools are stripped from the
|
||||
exposed tool list before the model gets to see them. This is
|
||||
opencode's ``Permission.disabled`` equivalent and dramatically reduces
|
||||
the chance the model emits a deny-only call.
|
||||
exposed tool list before the model gets to see them. This mirrors
|
||||
OpenCode's ``Permission.disabled`` and dramatically reduces the chance
|
||||
the model emits a deny-only call.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -117,7 +122,7 @@ class PermissionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
self._emit_interrupt = always_emit_interrupt_payload
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-filter step (opencode `Permission.disabled` equivalent)
|
||||
# Tool-filter step (mirrors OpenCode's ``Permission.disabled``)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _globally_denied(self, tool_name: str) -> bool:
|
||||
|
|
@ -197,8 +202,8 @@ class PermissionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
|||
"always": patterns,
|
||||
},
|
||||
}
|
||||
# Tier 3b: permission.asked + interrupt.raised spans (no-op when
|
||||
# OTel is disabled). Both fire here so dashboards can correlate
|
||||
# Open ``permission.asked`` + ``interrupt.raised`` OTel spans
|
||||
# (no-op when OTel is disabled) so dashboards can correlate
|
||||
# "we asked X" with "interrupt was actually delivered".
|
||||
with (
|
||||
ot.permission_asked_span(
|
||||
|
|
|
|||
|
|
@ -1,10 +1,16 @@
|
|||
"""
|
||||
RetryAfterMiddleware — Header-aware retry with custom backoff and SSE eventing.
|
||||
|
||||
Why standalone instead of subclassing ``ModelRetryMiddleware``: the upstream
|
||||
class calls module-level ``calculate_delay`` inline (no overridable
|
||||
``_calculate_delay`` hook), so a subclass cannot inject Retry-After header
|
||||
delays without rewriting the loop. Tier 1.4 in the OpenCode-port plan.
|
||||
LangChain's :class:`ModelRetryMiddleware` retries on exceptions but ignores
|
||||
the ``Retry-After`` HTTP header — it just runs its own exponential backoff.
|
||||
That wastes time when a provider has explicitly told us how long to wait.
|
||||
This middleware honors the header (mirroring OpenCode's
|
||||
``packages/opencode/src/session/llm.ts`` retry pathway) and emits an SSE
|
||||
event so the UI can show "rate-limited, retrying in Ns".
|
||||
|
||||
We can't subclass ``ModelRetryMiddleware`` cleanly because its loop calls a
|
||||
module-level ``calculate_delay`` inline (no overridable
|
||||
``_calculate_delay`` hook), so this is a standalone implementation.
|
||||
|
||||
Behaviour:
|
||||
- Extracts ``Retry-After`` / ``retry-after-ms`` from
|
||||
|
|
|
|||
|
|
@ -1,10 +1,6 @@
|
|||
"""
|
||||
ToolCallNameRepairMiddleware — two-stage tool-name repair.
|
||||
|
||||
Mirrors ``opencode/packages/opencode/src/session/llm.ts:339-358`` plus
|
||||
``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.7 in the
|
||||
OpenCode-port plan.
|
||||
|
||||
Operation:
|
||||
1. **Stage 1 — lowercase repair:** if a tool call's ``name`` is not in
|
||||
the registry but ``name.lower()`` is, rewrite in place. Catches
|
||||
|
|
@ -14,9 +10,13 @@ Operation:
|
|||
so the registered :func:`invalid_tool` returns the error to the model
|
||||
for self-correction.
|
||||
|
||||
Distinct from :class:`deepagents.middleware.PatchToolCallsMiddleware`,
|
||||
which patches *dangling* tool calls (no matching ToolMessage) — that
|
||||
class does not handle the wrong-name case at all.
|
||||
Ported from OpenCode's ``packages/opencode/src/session/llm.ts:339-358``
|
||||
+ ``packages/opencode/src/tool/invalid.ts``. LangChain has no equivalent:
|
||||
:class:`deepagents.middleware.PatchToolCallsMiddleware` patches
|
||||
*dangling* tool calls (no matching ToolMessage) but does nothing about
|
||||
wrong names, and the model framework's default behavior on an unknown
|
||||
name is to crash the turn rather than route to a self-correction
|
||||
fallback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -61,7 +61,8 @@ class ToolCallNameRepairMiddleware(
|
|||
``invalid`` should be in this set so the fallback dispatches.
|
||||
fuzzy_match_threshold: Optional ``difflib`` ratio (0-1) for the
|
||||
fuzzy-match step that runs *between* lowercase and invalid.
|
||||
Set to ``None`` to disable fuzzy matching (opencode parity).
|
||||
Set to ``None`` to disable fuzzy matching (default in
|
||||
OpenCode; we mirror that to avoid silent rewrites).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
|
@ -106,7 +107,7 @@ class ToolCallNameRepairMiddleware(
|
|||
call["response_metadata"] = metadata
|
||||
return call
|
||||
|
||||
# Optional fuzzy step (off by default for opencode parity)
|
||||
# Optional fuzzy step (off by default — see class docstring)
|
||||
if self._fuzzy_threshold is not None:
|
||||
close = difflib.get_close_matches(
|
||||
name, registered, n=1, cutoff=self._fuzzy_threshold
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
"""
|
||||
Wildcard pattern matching + rule evaluation for the SurfSense permission system.
|
||||
|
||||
Mirrors ``opencode/packages/opencode/src/permission/evaluate.ts`` and
|
||||
``opencode/packages/opencode/src/util/wildcard.ts`` precisely:
|
||||
Ported from OpenCode's ``packages/opencode/src/permission/evaluate.ts`` and
|
||||
``packages/opencode/src/util/wildcard.ts``. LangChain has no rule-based
|
||||
permission evaluator, so we keep OpenCode's semantics intact:
|
||||
|
||||
- ``Wildcard.match`` matches both the ``permission`` and the ``pattern``
|
||||
fields of a rule against the requested ``(permission, pattern)`` pair.
|
||||
``*`` matches any segment, ``**`` matches across separators.
|
||||
- The evaluator runs ``findLast`` over the **flattened** list of rules
|
||||
from all rulesets — last matching rule wins.
|
||||
- The default fallback is ``ask`` (NOT deny), matching opencode.
|
||||
- The default fallback is ``ask`` (NOT deny), matching OpenCode.
|
||||
- Multi-pattern requests AND together: if ANY pattern resolves to
|
||||
``deny``, the whole request is denied; if ANY needs ``ask``, an
|
||||
interrupt is raised; only when all patterns ``allow`` does the
|
||||
request proceed.
|
||||
|
||||
Tier 2.1 in the OpenCode-port plan.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
"""Entry-point based plugin loader for SurfSense agent middleware.
|
||||
|
||||
The realization in the Tier 6 plan: LangChain's :class:`AgentMiddleware` ABC
|
||||
already covers the practical surface most plugins need (``before_agent`` /
|
||||
``before_model`` / ``wrap_tool_call`` / their async counterparts), so a
|
||||
SurfSense-specific plugin protocol is unnecessary.
|
||||
LangChain's :class:`AgentMiddleware` ABC already covers the practical
|
||||
surface most plugins need (``before_agent`` / ``before_model`` /
|
||||
``wrap_tool_call`` / their async counterparts), so a SurfSense-specific
|
||||
plugin protocol would be redundant. We just need a way to discover and
|
||||
admit third-party middleware safely.
|
||||
|
||||
A plugin is therefore just an installable Python package that registers a
|
||||
factory callable under the ``surfsense.plugins`` entry-point group:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
"""Reference plugin: substitute ``{{year}}`` in tool descriptions.
|
||||
|
||||
Mirrors the OpenCode ``chat.system.transform`` example. Demonstrates the
|
||||
:meth:`AgentMiddleware.awrap_tool_call` hook -- the plugin sees every tool
|
||||
invocation and can rewrite the request *or* the result. This particular
|
||||
plugin is read-only and only transforms the *description* the user might
|
||||
see in error messages (no request mutation).
|
||||
Demonstrates the :meth:`AgentMiddleware.awrap_tool_call` hook -- the
|
||||
plugin sees every tool invocation and can rewrite the request *or* the
|
||||
result. This particular plugin is read-only and only transforms the
|
||||
*description* the user might see in error messages (no request
|
||||
mutation).
|
||||
|
||||
The plugin is built as a factory function so the entry-point loader can
|
||||
inject :class:`PluginContext` (containing the agent's LLM, search-space
|
||||
|
|
|
|||
|
|
@ -14,7 +14,13 @@ under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic
|
|||
examples/ # one ``<name>.md`` per tool with call examples
|
||||
routing/ # connector-specific routing notes (linear, slack, …)
|
||||
|
||||
Tier 3a in the OpenCode-port plan.
|
||||
The model-family dispatch step (see :func:`detect_provider_variant`)
|
||||
mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different
|
||||
model families respond best to differently-styled prompts (Claude likes
|
||||
XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs
|
||||
terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's
|
||||
``dynamic_prompt`` helper supports per-call prompt swaps but ships no
|
||||
out-of-the-box family classifier, so we keep our own.
|
||||
|
||||
Backwards compatibility
|
||||
=======================
|
||||
|
|
@ -42,10 +48,11 @@ from app.db import ChatVisibility
|
|||
# When adding a new variant, also drop a matching ``providers/<variant>.md``
|
||||
# file in this package and (if appropriate) extend the regex matchers below.
|
||||
#
|
||||
# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
|
||||
# to SurfSense's "supplemental hints" architecture (each fragment is a
|
||||
# focused style nudge, NOT a full system prompt — the main prompt is
|
||||
# already assembled from base/ + tools/ + routing/).
|
||||
# Stylistic clusters: each variant is a focused style nudge, NOT a full
|
||||
# system prompt — the main prompt is already assembled from base/ +
|
||||
# tools/ + routing/. The clustering itself (which models map to which
|
||||
# style) follows OpenCode's ``system.ts`` family table; see the module
|
||||
# docstring for credits.
|
||||
ProviderVariant = str
|
||||
# Known values:
|
||||
# "anthropic" — Claude family (XML-friendly, narrative todos)
|
||||
|
|
@ -82,8 +89,8 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
|
||||
Order is significant: more-specific patterns are tried first so
|
||||
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
|
||||
``"openai_reasoning"`` (mirrors OpenCode's
|
||||
``packages/opencode/src/session/system.ts`` dispatch).
|
||||
``"openai_reasoning"`` — same dispatch order as OpenCode's
|
||||
``packages/opencode/src/session/system.ts``.
|
||||
"""
|
||||
if not model_name:
|
||||
return "default"
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
"""Specialized user-facing subagents for the SurfSense agent.
|
||||
|
||||
Each subagent is a :class:`deepagents.SubAgent` typed-dict spec passed to
|
||||
:class:`deepagents.SubAgentMiddleware`, which materializes them as ephemeral
|
||||
runnables invoked via the ``task`` tool.
|
||||
The :class:`deepagents.SubAgentMiddleware` already provides the
|
||||
materialization machinery (each :class:`deepagents.SubAgent` typed-dict
|
||||
spec is compiled into an ephemeral runnable invoked via the ``task``
|
||||
tool); what's specific to SurfSense is the *seeding* of those subagents
|
||||
with declarative deny rules.
|
||||
|
||||
Per-subagent permission rules are injected as a
|
||||
:class:`PermissionMiddleware` entry inside the subagent's ``middleware``
|
||||
field, mirroring opencode ``tool/task.ts`` which seeds child sessions with
|
||||
deny rules for tools the parent does not want them touching (e.g.
|
||||
``task``/``todowrite`` recursion, write tools for read-only research roles).
|
||||
field. The auto-deny pattern (e.g. forbid ``task``/``todowrite``
|
||||
recursion, block write tools for read-only research roles) is borrowed
|
||||
from OpenCode's ``packages/opencode/src/tool/task.ts``, which has
|
||||
analogous logic for restricting child sessions.
|
||||
"""
|
||||
|
||||
from .config import (
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
"""
|
||||
Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`.
|
||||
|
||||
Tier 3a of the OpenCode-port plan replaced the monolithic prompt strings
|
||||
in this module with a fragment tree under ``prompts/`` and a composer
|
||||
function. This module preserves the public function surface
|
||||
(``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` /
|
||||
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so that
|
||||
existing call sites — `chat_deepagent.py`, anonymous chat routes, and the
|
||||
configurable-prompt admin path — keep working without churn.
|
||||
The composer split the previous monolithic prompt string into a fragment
|
||||
tree under ``prompts/`` plus a model-family dispatch step (see the
|
||||
composer module docstring for credits). This module preserves the public
|
||||
function surface (``build_surfsense_system_prompt`` /
|
||||
``build_configurable_system_prompt`` /
|
||||
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
|
||||
that existing call sites — `chat_deepagent.py`, anonymous chat routes,
|
||||
and the configurable-prompt admin path — keep working without churn.
|
||||
|
||||
For new call sites prefer importing ``compose_system_prompt`` directly
|
||||
from :mod:`app.agents.new_chat.prompts.composer`.
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@ tool, :class:`ToolCallNameRepairMiddleware` rewrites the call to ``invalid``
|
|||
with the original name and a parser/validation error string. This tool's
|
||||
execution then returns that error to the model so it can self-correct.
|
||||
|
||||
Mirrors ``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.6 in
|
||||
the OpenCode-port plan.
|
||||
Ported from OpenCode's ``packages/opencode/src/tool/invalid.ts`` —
|
||||
LangChain has no equivalent fallback path; the default behavior on an
|
||||
unknown tool name is a hard ``ToolNotFoundError`` which kills the turn.
|
||||
|
||||
Critically, the :class:`ToolDefinition` for this tool is **excluded** from
|
||||
the system-prompt tool list and from ``LLMToolSelectorMiddleware`` selection
|
||||
|
|
|
|||
|
|
@ -132,12 +132,10 @@ class ToolDefinition:
|
|||
that must be in ``available_connectors`` for the tool to be enabled.
|
||||
dedup_key: Optional callable that maps a tool's ``args`` dict to a
|
||||
string signature used by :class:`DedupHITLToolCallsMiddleware`
|
||||
to drop duplicate calls. Replaces the legacy hardcoded
|
||||
``_NATIVE_HITL_TOOL_DEDUP_KEYS`` map (Tier 2.3 in the
|
||||
OpenCode-port plan).
|
||||
to drop duplicate calls within a single LLM response.
|
||||
reverse: Optional callable that, given the tool's ``(args, result)``,
|
||||
returns a ``ReverseDescriptor`` describing the inverse tool
|
||||
invocation. Consumed by the snapshot/revert pipeline (Tier 5).
|
||||
invocation. Consumed by the snapshot/revert pipeline.
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,10 @@
|
|||
"""
|
||||
OpenTelemetry instrumentation helpers for the SurfSense agent stack.
|
||||
|
||||
Tier 3b in the OpenCode-port plan.
|
||||
|
||||
Goals
|
||||
=====
|
||||
|
||||
- Provide one tiny, ergonomic API for the spans listed in the plan
|
||||
- Provide one tiny, ergonomic API for the spans we care about
|
||||
(``tool.call``, ``model.call``, ``kb.search``, ``kb.persist``,
|
||||
``compaction.run``, ``interrupt.raised``, ``permission.asked``).
|
||||
- Keep span **names** low-cardinality (``tool.call`` rather than
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
"""POST ``/api/threads/{thread_id}/revert/{action_id}``: undo an agent action.
|
||||
|
||||
Per the Tier 5 plan, the route ships **before** the UI lights up the per-message
|
||||
"Undo from here" affordance. To prevent accidental usage during the gap we
|
||||
return ``503 Service Unavailable`` until the
|
||||
``SURFSENSE_ENABLE_REVERT_ROUTE`` flag flips. Once enabled, the route runs:
|
||||
The route ships **before** the UI lights up the per-message "Undo from
|
||||
here" affordance. To prevent accidental usage during the gap we return
|
||||
``503 Service Unavailable`` until the ``SURFSENSE_ENABLE_REVERT_ROUTE``
|
||||
flag flips. Once enabled, the route runs:
|
||||
|
||||
1. Authentication via :func:`current_active_user`.
|
||||
2. Action lookup; 404 if the action does not belong to the thread.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Tests for the prompt fragment composer (Tier 3a)."""
|
||||
"""Tests for the prompt fragment composer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Tests for the OtelSpanMiddleware adapter (Tier 3b)."""
|
||||
"""Tests for the OtelSpanMiddleware adapter."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Tests for the wildcard matcher and rule evaluator (opencode evaluate.ts parity)."""
|
||||
"""Tests for the wildcard matcher and rule evaluator (parity with OpenCode evaluate.ts)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Unit tests for the SurfSense plugin entry-point loader (Tier 6)."""
|
||||
"""Unit tests for the SurfSense plugin entry-point loader."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Tests for the SurfSense OpenTelemetry shim (Tier 3b)."""
|
||||
"""Tests for the SurfSense OpenTelemetry shim."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Unit tests for the agent revert service (Tier 5.3)."""
|
||||
"""Unit tests for the agent revert service."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue