mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
refactor(agents): delete single-agent stack + new_chat shim package (bucket B3/B4)
With multi-agent the only live factory (B1), the single-agent stack is dead. Remove app/agents/new_chat/ entirely: chat_deepagent.py, subagents/, and all re-export shims (errors/context/llm_config/permissions/tools/middleware/...) that existed only to serve frozen single-agent code. Live code already imports the shared kernel (app.agents.shared.*) directly. Tests: delete single-agent-only suites (test_resolve_prompt_model_name, test_specialized_subagents) and the chat_deepagent source-shape contract assertion; repoint test_scoped_model_fallback to the shared middleware path. Suite green (2710 passed).
This commit is contained in:
parent
724bbd6deb
commit
14bbea0854
29 changed files with 1 additions and 2930 deletions
|
|
@ -87,7 +87,7 @@ class RateLimitError(Exception):
|
|||
def _build_agent(primary: BaseChatModel, fallback: BaseChatModel):
|
||||
from langchain.agents import create_agent
|
||||
|
||||
from app.agents.new_chat.middleware.scoped_model_fallback import (
|
||||
from app.agents.shared.middleware.scoped_model_fallback import (
|
||||
ScopedModelFallbackMiddleware,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,117 +0,0 @@
|
|||
"""Tests for ``_resolve_prompt_model_name`` in :mod:`app.agents.new_chat.chat_deepagent`.
|
||||
|
||||
The helper picks the model id fed to ``detect_provider_variant`` so the
|
||||
right ``<provider_hints>`` block lands in the system prompt. The tests
|
||||
below pin its preference order:
|
||||
|
||||
1. ``agent_config.litellm_params["base_model"]`` (Azure-correct).
|
||||
2. ``agent_config.model_name``.
|
||||
3. ``getattr(llm, "model", None)``.
|
||||
|
||||
Without (1) an Azure deployment named e.g. ``"prod-chat-001"`` would
|
||||
silently miss every provider regex.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.chat_deepagent import _resolve_prompt_model_name
|
||||
from app.agents.shared.llm_config import AgentConfig
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _make_cfg(**overrides) -> AgentConfig:
|
||||
"""Build an ``AgentConfig`` with sensible defaults for the helper test."""
|
||||
defaults = {
|
||||
"provider": "OPENAI",
|
||||
"model_name": "x",
|
||||
"api_key": "k",
|
||||
}
|
||||
return AgentConfig(**{**defaults, **overrides})
|
||||
|
||||
|
||||
class _FakeLLM:
|
||||
"""Stand-in for a ``ChatLiteLLM`` / ``ChatLiteLLMRouter`` instance.
|
||||
|
||||
The resolver only reads the ``.model`` attribute via ``getattr``,
|
||||
matching the established idiom in ``knowledge_search.py`` /
|
||||
``stream_new_chat.py`` / ``document_summarizer.py``.
|
||||
"""
|
||||
|
||||
def __init__(self, model: str | None) -> None:
|
||||
self.model = model
|
||||
|
||||
|
||||
def test_prefers_litellm_params_base_model_over_deployment_name() -> None:
|
||||
"""Azure deployment slug must NOT shadow the underlying model family.
|
||||
|
||||
This is the failure mode the helper exists to prevent: a deployment
|
||||
named ``"azure/prod-chat-001"`` would not match any provider regex
|
||||
on its own, but the family ``"gpt-4o"`` lives in
|
||||
``litellm_params["base_model"]`` and routes to ``openai_classic``.
|
||||
"""
|
||||
cfg = _make_cfg(
|
||||
model_name="azure/prod-chat-001",
|
||||
litellm_params={"base_model": "gpt-4o"},
|
||||
)
|
||||
assert _resolve_prompt_model_name(cfg, _FakeLLM("azure/prod-chat-001")) == "gpt-4o"
|
||||
|
||||
|
||||
def test_falls_back_to_model_name_when_litellm_params_is_none() -> None:
|
||||
cfg = _make_cfg(
|
||||
model_name="anthropic/claude-3-5-sonnet",
|
||||
litellm_params=None,
|
||||
)
|
||||
got = _resolve_prompt_model_name(cfg, _FakeLLM("anthropic/claude-3-5-sonnet"))
|
||||
assert got == "anthropic/claude-3-5-sonnet"
|
||||
|
||||
|
||||
def test_handles_litellm_params_without_base_model_key() -> None:
|
||||
cfg = _make_cfg(
|
||||
model_name="openai/gpt-4o",
|
||||
litellm_params={"temperature": 0.5},
|
||||
)
|
||||
assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o"
|
||||
|
||||
|
||||
def test_ignores_blank_base_model() -> None:
|
||||
"""Whitespace-only ``base_model`` must not shadow ``model_name``."""
|
||||
cfg = _make_cfg(
|
||||
model_name="openai/gpt-4o",
|
||||
litellm_params={"base_model": " "},
|
||||
)
|
||||
assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o"
|
||||
|
||||
|
||||
def test_ignores_non_string_base_model() -> None:
|
||||
"""Defensive: a non-string ``base_model`` should not crash the resolver."""
|
||||
cfg = _make_cfg(
|
||||
model_name="openai/gpt-4o",
|
||||
litellm_params={"base_model": 42},
|
||||
)
|
||||
assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o"
|
||||
|
||||
|
||||
def test_falls_back_to_llm_model_when_no_agent_config() -> None:
|
||||
"""No ``agent_config`` -> use ``llm.model`` directly. Defensive path
|
||||
for direct callers; production callers always supply a config."""
|
||||
assert (
|
||||
_resolve_prompt_model_name(None, _FakeLLM("openai/gpt-4o-mini"))
|
||||
== "openai/gpt-4o-mini"
|
||||
)
|
||||
|
||||
|
||||
def test_returns_none_when_nothing_available() -> None:
|
||||
"""``compose_system_prompt`` treats ``None`` as the ``"default"``
|
||||
variant and emits no provider block."""
|
||||
assert _resolve_prompt_model_name(None, _FakeLLM(None)) is None
|
||||
|
||||
|
||||
def test_auto_mode_resolves_to_auto_string() -> None:
|
||||
"""Auto mode -> ``"auto"``. ``detect_provider_variant("auto")``
|
||||
returns ``"default"``, which is correct: the child model isn't
|
||||
known until the LiteLLM Router dispatches."""
|
||||
cfg = AgentConfig.from_auto_mode()
|
||||
assert _resolve_prompt_model_name(cfg, _FakeLLM("auto")) == "auto"
|
||||
|
|
@ -1,337 +0,0 @@
|
|||
"""Tests for the specialized subagents (explore / report_writer / connector_negotiator)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from app.agents.shared.middleware.permission import PermissionMiddleware
|
||||
from app.agents.new_chat.subagents import (
|
||||
build_connector_negotiator_subagent,
|
||||
build_explore_subagent,
|
||||
build_report_writer_subagent,
|
||||
build_specialized_subagents,
|
||||
)
|
||||
from app.agents.new_chat.subagents.config import (
|
||||
EXPLORE_READ_TOOLS,
|
||||
REPORT_WRITER_TOOLS,
|
||||
WRITE_TOOL_DENY_PATTERNS,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake tools used to verify filtering & permission behavior
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@tool
|
||||
def web_search(query: str) -> str:
|
||||
"""Search the public web."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def scrape_webpage(url: str) -> str:
|
||||
"""Scrape a single webpage."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def read_file(path: str) -> str:
|
||||
"""Read a file."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def ls_tree(path: str) -> str:
|
||||
"""List a tree."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def grep(pattern: str) -> str:
|
||||
"""Grep."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def update_memory(content: str) -> str:
|
||||
"""Update the user's memory."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def edit_file(path: str, old: str, new: str) -> str:
|
||||
"""Edit a file."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def linear_create_issue(title: str) -> str:
|
||||
"""Create a Linear issue."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def slack_send_message(channel: str, text: str) -> str:
|
||||
"""Send a Slack message."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def get_connected_accounts() -> str:
|
||||
"""List connected accounts."""
|
||||
return ""
|
||||
|
||||
|
||||
@tool
|
||||
def generate_report(topic: str) -> str:
|
||||
"""Generate a report artifact."""
|
||||
return ""
|
||||
|
||||
|
||||
ALL_TOOLS = [
|
||||
web_search,
|
||||
scrape_webpage,
|
||||
read_file,
|
||||
ls_tree,
|
||||
grep,
|
||||
update_memory,
|
||||
edit_file,
|
||||
linear_create_issue,
|
||||
slack_send_message,
|
||||
get_connected_accounts,
|
||||
generate_report,
|
||||
]
|
||||
|
||||
|
||||
class TestExploreSubagent:
|
||||
def test_only_read_tools_are_exposed(self) -> None:
|
||||
spec = build_explore_subagent(tools=ALL_TOOLS)
|
||||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||
assert names == EXPLORE_READ_TOOLS & {t.name for t in ALL_TOOLS}
|
||||
assert "update_memory" not in names
|
||||
assert "linear_create_issue" not in names
|
||||
assert "edit_file" not in names
|
||||
|
||||
def test_includes_permission_middleware_with_deny_rules(self) -> None:
|
||||
spec = build_explore_subagent(tools=ALL_TOOLS)
|
||||
permission_mws = [
|
||||
m
|
||||
for m in spec["middleware"]
|
||||
if isinstance(m, PermissionMiddleware) # type: ignore[index]
|
||||
]
|
||||
assert len(permission_mws) == 1
|
||||
ruleset = permission_mws[0]._static_rulesets[0]
|
||||
assert ruleset.origin == "subagent_explore"
|
||||
deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"}
|
||||
assert "update_memory" in deny_patterns
|
||||
assert "edit_file" in deny_patterns
|
||||
assert "*create*" in deny_patterns
|
||||
assert "*send*" in deny_patterns
|
||||
|
||||
def test_skills_inherits_default_sources(self) -> None:
|
||||
spec = build_explore_subagent(tools=ALL_TOOLS)
|
||||
assert spec["skills"] == ["/skills/builtin/", "/skills/space/"] # type: ignore[index]
|
||||
|
||||
def test_name_and_description_match_contract(self) -> None:
|
||||
spec = build_explore_subagent(tools=ALL_TOOLS)
|
||||
assert spec["name"] == "explore"
|
||||
assert "read-only" in spec["description"].lower()
|
||||
|
||||
def test_includes_dedup_and_patch_middleware(self) -> None:
|
||||
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
|
||||
|
||||
from app.agents.shared.middleware import DedupHITLToolCallsMiddleware
|
||||
|
||||
spec = build_explore_subagent(tools=ALL_TOOLS)
|
||||
types = {type(m) for m in spec["middleware"]} # type: ignore[index]
|
||||
assert PatchToolCallsMiddleware in types
|
||||
assert DedupHITLToolCallsMiddleware in types
|
||||
|
||||
|
||||
class TestReportWriterSubagent:
|
||||
def test_exposes_only_report_writing_tools(self) -> None:
|
||||
spec = build_report_writer_subagent(tools=ALL_TOOLS)
|
||||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||
assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS}
|
||||
assert "generate_report" in names
|
||||
assert "read_file" in names
|
||||
|
||||
def test_deny_rules_block_writes_but_allow_generate_report(self) -> None:
|
||||
spec = build_report_writer_subagent(tools=ALL_TOOLS)
|
||||
permission_mws = [
|
||||
m
|
||||
for m in spec["middleware"]
|
||||
if isinstance(m, PermissionMiddleware) # type: ignore[index]
|
||||
]
|
||||
ruleset = permission_mws[0]._static_rulesets[0]
|
||||
deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"}
|
||||
assert "update_memory" in deny_patterns
|
||||
# generate_report MUST not be denied — it's the whole point of the subagent.
|
||||
assert "generate_report" not in deny_patterns
|
||||
# No deny pattern should match `generate_report` either.
|
||||
assert all(
|
||||
not _wildcard_matches(pattern, "generate_report")
|
||||
for pattern in deny_patterns
|
||||
)
|
||||
|
||||
|
||||
class TestConnectorNegotiatorSubagent:
|
||||
def test_inherits_all_parent_tools(self) -> None:
|
||||
spec = build_connector_negotiator_subagent(tools=ALL_TOOLS)
|
||||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||
# Every parent tool is inherited; the deny ruleset enforces behavior
|
||||
# at execution time instead of trimming the tool list.
|
||||
assert names == {t.name for t in ALL_TOOLS}
|
||||
|
||||
def test_get_connected_accounts_is_present(self) -> None:
|
||||
spec = build_connector_negotiator_subagent(tools=ALL_TOOLS)
|
||||
names = {t.name for t in spec["tools"]} # type: ignore[index]
|
||||
assert "get_connected_accounts" in names
|
||||
|
||||
def test_deny_ruleset_blocks_mutating_connector_tools(self) -> None:
|
||||
spec = build_connector_negotiator_subagent(tools=ALL_TOOLS)
|
||||
permission_mws = [
|
||||
m
|
||||
for m in spec["middleware"]
|
||||
if isinstance(m, PermissionMiddleware) # type: ignore[index]
|
||||
]
|
||||
ruleset = permission_mws[0]._static_rulesets[0]
|
||||
deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"}
|
||||
# `linear_create_issue` matches the `*_create` deny pattern.
|
||||
assert any(_wildcard_matches(p, "linear_create_issue") for p in deny_patterns)
|
||||
assert any(_wildcard_matches(p, "slack_send_message") for p in deny_patterns)
|
||||
|
||||
|
||||
class TestBuildSpecializedSubagents:
|
||||
def test_returns_five_specs(self) -> None:
|
||||
specs = build_specialized_subagents(tools=ALL_TOOLS)
|
||||
names = [s["name"] for s in specs] # type: ignore[index]
|
||||
assert names == [
|
||||
"explore",
|
||||
"report_writer",
|
||||
"linear_specialist",
|
||||
"slack_specialist",
|
||||
"connector_negotiator",
|
||||
]
|
||||
|
||||
def test_all_specs_have_unique_names(self) -> None:
|
||||
specs = build_specialized_subagents(tools=ALL_TOOLS)
|
||||
names = [s["name"] for s in specs] # type: ignore[index]
|
||||
assert len(set(names)) == len(names)
|
||||
|
||||
def test_extra_middleware_is_prepended_to_each_spec(self) -> None:
|
||||
"""Sentinel middleware passed via ``extra_middleware`` must appear
|
||||
in each subagent's ``middleware`` list, before the local rules.
|
||||
|
||||
This guards against the regression where specialized subagents
|
||||
promised filesystem tools (``read_file``, ``ls``, ``grep``) in
|
||||
their system prompts but had no filesystem middleware mounted.
|
||||
"""
|
||||
|
||||
class _Sentinel:
|
||||
pass
|
||||
|
||||
sentinel = _Sentinel()
|
||||
specs = build_specialized_subagents(
|
||||
tools=ALL_TOOLS, extra_middleware=[sentinel]
|
||||
)
|
||||
for spec in specs:
|
||||
mws = spec["middleware"] # type: ignore[index]
|
||||
assert sentinel in mws
|
||||
# The sentinel must appear *before* the permission middleware
|
||||
# (subagent-local rules), preserving the documented composition
|
||||
# order: extra → custom → patch → dedup.
|
||||
sentinel_idx = mws.index(sentinel)
|
||||
perm_idx = next(
|
||||
(i for i, m in enumerate(mws) if isinstance(m, PermissionMiddleware)),
|
||||
None,
|
||||
)
|
||||
assert perm_idx is not None
|
||||
assert sentinel_idx < perm_idx
|
||||
|
||||
|
||||
class TestFilterToolsWarningSuppression:
|
||||
"""Names provided by middleware (read_file, ls, grep, …) must not
|
||||
trigger the spurious "missing" warning in :func:`_filter_tools`."""
|
||||
|
||||
def test_middleware_provided_names_are_silent(self, caplog) -> None:
|
||||
import logging
|
||||
|
||||
from app.agents.new_chat.subagents.config import _filter_tools
|
||||
|
||||
with caplog.at_level(
|
||||
logging.INFO, logger="app.agents.new_chat.subagents.config"
|
||||
):
|
||||
# Allowed set asks for two registry tools (one present, one
|
||||
# not) plus a bunch of middleware-provided names.
|
||||
_filter_tools(
|
||||
[web_search],
|
||||
allowed_names={
|
||||
"web_search",
|
||||
"scrape_webpage", # legitimately missing → should warn
|
||||
"read_file", # mw-provided → suppressed
|
||||
"ls",
|
||||
"grep",
|
||||
"glob",
|
||||
"write_todos",
|
||||
},
|
||||
)
|
||||
|
||||
warnings = [r.message for r in caplog.records if r.levelno >= logging.INFO]
|
||||
# Exactly one warning, and it should mention scrape_webpage but not
|
||||
# any middleware-provided name. Inspect the rendered "missing"
|
||||
# list (between the brackets) so we don't false-match substrings
|
||||
# like ``ls`` inside ``available``.
|
||||
assert len(warnings) == 1, warnings
|
||||
msg = warnings[0]
|
||||
assert "scrape_webpage" in msg
|
||||
bracket_section = msg.split("missing: ", 1)[1]
|
||||
for noisy in ("read_file", "ls", "grep", "glob", "write_todos"):
|
||||
assert f"'{noisy}'" not in bracket_section, msg
|
||||
|
||||
|
||||
class TestDenyPatternsCoverage:
|
||||
def test_deny_patterns_cover_canonical_write_tools(self) -> None:
|
||||
canonical_writes = [
|
||||
"update_memory",
|
||||
"edit_file",
|
||||
"write_file",
|
||||
"move_file",
|
||||
"mkdir",
|
||||
"linear_create_issue",
|
||||
"linear_update_issue",
|
||||
"linear_delete_issue",
|
||||
"slack_send_message",
|
||||
"create_index",
|
||||
"update_account",
|
||||
"delete_record",
|
||||
"send_email",
|
||||
]
|
||||
for tool_name in canonical_writes:
|
||||
assert any(
|
||||
_wildcard_matches(pattern, tool_name)
|
||||
for pattern in WRITE_TOOL_DENY_PATTERNS
|
||||
), f"no deny pattern matches {tool_name!r}"
|
||||
|
||||
def test_deny_patterns_do_not_match_safe_read_tools(self) -> None:
|
||||
canonical_reads = [
|
||||
"read_file",
|
||||
"ls_tree",
|
||||
"grep",
|
||||
"web_search",
|
||||
"scrape_webpage",
|
||||
"get_connected_accounts",
|
||||
"generate_report",
|
||||
]
|
||||
for tool_name in canonical_reads:
|
||||
assert not any(
|
||||
_wildcard_matches(pattern, tool_name)
|
||||
for pattern in WRITE_TOOL_DENY_PATTERNS
|
||||
), f"deny pattern incorrectly matches read tool {tool_name!r}"
|
||||
|
||||
|
||||
def _wildcard_matches(pattern: str, value: str) -> bool:
|
||||
"""Helper using the same matcher the rule evaluator does."""
|
||||
from app.agents.shared.permissions import wildcard_match
|
||||
|
||||
return wildcard_match(value, pattern)
|
||||
|
|
@ -436,39 +436,3 @@ def test_turn_status_sse_contract_exists():
|
|||
assert 'type: "data-turn-status"' in state_source
|
||||
assert 'case "data-turn-status":' in pipeline_source
|
||||
assert "end_turn(str(chat_id))" in stream_source
|
||||
|
||||
|
||||
def test_chat_deepagent_forwards_resolved_model_name_to_both_builders():
|
||||
"""Regression guard: both system-prompt builders in chat_deepagent.py
|
||||
must receive ``model_name=_resolve_prompt_model_name(...)`` so the
|
||||
provider-variant dispatch can render the right ``<provider_hints>``
|
||||
block. Without this the prompt silently falls back to the empty
|
||||
``"default"`` variant — the original bug being fixed.
|
||||
|
||||
This test mirrors :func:`test_stream_error_emission_keeps_machine_error_codes`
|
||||
in style: it inspects module source text + a regex to enforce the
|
||||
call-site shape, not just the wrapper layer (the wrappers already
|
||||
forward ``model_name`` correctly, so testing them would not catch
|
||||
the actual missed plumbing).
|
||||
"""
|
||||
import app.agents.new_chat.chat_deepagent as chat_deepagent_module
|
||||
|
||||
source = inspect.getsource(chat_deepagent_module)
|
||||
|
||||
# Helper itself must be defined.
|
||||
assert "def _resolve_prompt_model_name(" in source
|
||||
|
||||
# Both builder calls must forward the resolved model name. Match
|
||||
# across newlines + whitespace because the kwargs are split over
|
||||
# multiple lines.
|
||||
pattern = re.compile(
|
||||
r"build_(?:surfsense|configurable)_system_prompt\([^)]*"
|
||||
r"model_name=_resolve_prompt_model_name\(",
|
||||
re.DOTALL,
|
||||
)
|
||||
matches = pattern.findall(source)
|
||||
assert len(matches) == 2, (
|
||||
"Expected both system-prompt builder call sites to forward "
|
||||
"`model_name=_resolve_prompt_model_name(...)`, found "
|
||||
f"{len(matches)}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue