mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
After the main agent moved to its own build_main_agent_tools, nothing calls the shared registry's builders. Delete the dead functions (build_tools, build_tools_async, get_tool_by_name, get_all_tool_names, get_default_enabled_tools) plus the now-orphaned load_mcp_tools import and the stale __init__ re-exports. BUILTIN_TOOLS, ToolDefinition, and get_connector_gated_tools are retained: the catalog is still consumed for tool *metadata* (action_log revert/dedup resolvers and the /agent/tools listing). Also drop stale references to the deleted chat_deepagent.py within the agents module. Verified: full unit suite green (2431 passed, 1 skipped); lints clean.
233 lines
8.5 KiB
Python
233 lines
8.5 KiB
Python
r"""Coalesce multi-block system messages into a single text block.
|
|
|
|
Several middlewares in our deepagent stack each call
|
|
``append_to_system_message`` on the way down to the model
|
|
(``TodoListMiddleware``, ``SurfSenseFilesystemMiddleware``,
|
|
``SkillsMiddleware``, ``SubAgentMiddleware`` …). By the time the
|
|
request reaches the LLM, the system message has 5+ separate text blocks.
|
|
|
|
Anthropic enforces a hard cap of **4 ``cache_control`` blocks per
|
|
request**, and we configure 2 injection points
|
|
(``index: 0`` + ``index: -1``). With ``index: 0`` always targeting
|
|
the prepended ``request.system_message``, this middleware is the
|
|
defensive partner: it guarantees that "the system block" is *one*
|
|
content block, so LiteLLM's ``AnthropicCacheControlHook`` and any
|
|
OpenRouter→Anthropic transformer can never multiply our budget into
|
|
several breakpoints by spreading ``cache_control`` across multiple
|
|
text blocks of a multi-block system content.
|
|
|
|
Without flattening we used to see::
|
|
|
|
OpenrouterException - {"error":{"message":"Provider returned error",
|
|
"code":400,"metadata":{"raw":"...A maximum of 4 blocks with
|
|
cache_control may be provided. Found 5."}}}
|
|
|
|
(Same error class documented in
|
|
https://github.com/BerriAI/litellm/issues/15696 and
|
|
https://github.com/BerriAI/litellm/issues/20485 — the litellm-side fix
|
|
in PR #15395 covers the litellm transformer but does not protect us
|
|
when the OpenRouter SaaS itself does the redistribution.)
|
|
|
|
A separate fix in :mod:`app.agents.shared.prompt_caching` (switching
|
|
the first injection point from ``role: system`` to ``index: 0``)
|
|
neutralises the *primary* cause of the same 400 — multiple
|
|
``SystemMessage``\ s injected by ``before_agent`` middlewares
|
|
(priority/tree/memory/file-intent/anonymous-doc) accumulating across
|
|
turns, each tagged with ``cache_control`` by the ``role: system``
|
|
matcher. This middleware remains useful as defence-in-depth against
|
|
the multi-block redistribution path.
|
|
|
|
Placement: innermost on the system-message-mutation chain, after every
|
|
appender (``todo``/``filesystem``/``skills``/``subagents``) and after
|
|
summarization, but before ``noop``/``retry``/``fallback`` so each retry
|
|
attempt sees a flattened payload.
|
|
|
|
Idempotent: a string-content system message is left untouched. A list
|
|
that contains anything other than plain text blocks (e.g. an image) is
|
|
also left untouched — those are rare on system messages and we'd lose
|
|
the non-text payload by joining.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from collections.abc import Awaitable, Callable
|
|
from typing import Any
|
|
|
|
from langchain.agents.middleware.types import (
|
|
AgentMiddleware,
|
|
AgentState,
|
|
ContextT,
|
|
ModelRequest,
|
|
ModelResponse,
|
|
ResponseT,
|
|
)
|
|
from langchain_core.messages import SystemMessage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _flatten_text_blocks(content: list[Any]) -> str | None:
|
|
"""Return joined text if every block is a plain ``{"type": "text"}``.
|
|
|
|
Returns ``None`` when the list contains anything that isn't a text
|
|
block we can safely concatenate (image, audio, file, non-standard
|
|
blocks, dicts with extra non-cache_control fields). The caller
|
|
leaves the original content untouched in that case rather than
|
|
silently dropping payload.
|
|
|
|
``cache_control`` on individual blocks is intentionally discarded —
|
|
the whole point of flattening is to let LiteLLM's
|
|
``cache_control_injection_points`` re-place a single breakpoint on
|
|
the resulting one-block system content.
|
|
"""
|
|
chunks: list[str] = []
|
|
for block in content:
|
|
if isinstance(block, str):
|
|
chunks.append(block)
|
|
continue
|
|
if not isinstance(block, dict):
|
|
return None
|
|
if block.get("type") != "text":
|
|
return None
|
|
text = block.get("text")
|
|
if not isinstance(text, str):
|
|
return None
|
|
chunks.append(text)
|
|
return "\n\n".join(chunks)
|
|
|
|
|
|
def _flattened_request(
|
|
request: ModelRequest[ContextT],
|
|
) -> ModelRequest[ContextT] | None:
|
|
"""Return a request with system_message flattened, or ``None`` for no-op."""
|
|
sys_msg = request.system_message
|
|
if sys_msg is None:
|
|
return None
|
|
content = sys_msg.content
|
|
if not isinstance(content, list) or len(content) <= 1:
|
|
return None
|
|
|
|
flattened = _flatten_text_blocks(content)
|
|
if flattened is None:
|
|
return None
|
|
|
|
new_sys = SystemMessage(
|
|
content=flattened,
|
|
additional_kwargs=dict(sys_msg.additional_kwargs),
|
|
response_metadata=dict(sys_msg.response_metadata),
|
|
)
|
|
if sys_msg.id is not None:
|
|
new_sys.id = sys_msg.id
|
|
return request.override(system_message=new_sys)
|
|
|
|
|
|
def _diagnostic_summary(request: ModelRequest[Any]) -> str:
|
|
"""One-line dump of cache_control-relevant request shape.
|
|
|
|
Temporary diagnostic to prove where the ``Found N`` cache_control
|
|
breakpoints are coming from when Anthropic 400s. Removed once the
|
|
root cause is confirmed and a fix is in place.
|
|
"""
|
|
sys_msg = request.system_message
|
|
if sys_msg is None:
|
|
sys_shape = "none"
|
|
elif isinstance(sys_msg.content, str):
|
|
sys_shape = f"str(len={len(sys_msg.content)})"
|
|
elif isinstance(sys_msg.content, list):
|
|
sys_shape = f"list(blocks={len(sys_msg.content)})"
|
|
else:
|
|
sys_shape = f"other({type(sys_msg.content).__name__})"
|
|
|
|
role_hist: list[str] = []
|
|
multi_block_msgs = 0
|
|
msgs_with_cc = 0
|
|
sys_msgs_in_history = 0
|
|
for m in request.messages:
|
|
mtype = getattr(m, "type", type(m).__name__)
|
|
role_hist.append(mtype)
|
|
if isinstance(m, SystemMessage):
|
|
sys_msgs_in_history += 1
|
|
c = getattr(m, "content", None)
|
|
if isinstance(c, list):
|
|
multi_block_msgs += 1
|
|
for blk in c:
|
|
if isinstance(blk, dict) and "cache_control" in blk:
|
|
msgs_with_cc += 1
|
|
break
|
|
if "cache_control" in getattr(m, "additional_kwargs", {}) or {}:
|
|
msgs_with_cc += 1
|
|
|
|
tools = request.tools or []
|
|
tools_with_cc = 0
|
|
for t in tools:
|
|
if isinstance(t, dict) and (
|
|
"cache_control" in t or "cache_control" in t.get("function", {})
|
|
):
|
|
tools_with_cc += 1
|
|
|
|
return (
|
|
f"sys={sys_shape} msgs={len(request.messages)} "
|
|
f"sys_msgs_in_history={sys_msgs_in_history} "
|
|
f"multi_block_msgs={multi_block_msgs} pre_existing_msg_cc={msgs_with_cc} "
|
|
f"tools={len(tools)} pre_existing_tool_cc={tools_with_cc} "
|
|
f"roles={role_hist[-8:]}"
|
|
)
|
|
|
|
|
|
class FlattenSystemMessageMiddleware(
|
|
AgentMiddleware[AgentState[ResponseT], ContextT, ResponseT]
|
|
):
|
|
"""Collapse a multi-text-block system message to a single string.
|
|
|
|
Sits innermost on the system-message-mutation chain so it observes
|
|
every middleware's contribution. Has no other side effect — the
|
|
body of every block is preserved, just joined with ``"\\n\\n"``.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.tools = []
|
|
|
|
def wrap_model_call( # type: ignore[override]
|
|
self,
|
|
request: ModelRequest[ContextT],
|
|
handler: Callable[[ModelRequest[ContextT]], ModelResponse[ResponseT]],
|
|
) -> Any:
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug("[flatten_system_diag] %s", _diagnostic_summary(request))
|
|
flattened = _flattened_request(request)
|
|
if flattened is not None:
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug(
|
|
"[flatten_system] collapsed %d system blocks to one",
|
|
len(request.system_message.content), # type: ignore[arg-type, union-attr]
|
|
)
|
|
return handler(flattened)
|
|
return handler(request)
|
|
|
|
async def awrap_model_call( # type: ignore[override]
|
|
self,
|
|
request: ModelRequest[ContextT],
|
|
handler: Callable[
|
|
[ModelRequest[ContextT]], Awaitable[ModelResponse[ResponseT]]
|
|
],
|
|
) -> Any:
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug("[flatten_system_diag] %s", _diagnostic_summary(request))
|
|
flattened = _flattened_request(request)
|
|
if flattened is not None:
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
logger.debug(
|
|
"[flatten_system] collapsed %d system blocks to one",
|
|
len(request.system_message.content), # type: ignore[arg-type, union-attr]
|
|
)
|
|
return await handler(flattened)
|
|
return await handler(request)
|
|
|
|
|
|
__all__ = [
|
|
"FlattenSystemMessageMiddleware",
|
|
"_flatten_text_blocks",
|
|
"_flattened_request",
|
|
]
|