Merge upstream/dev into feature/mcp-migration

This commit is contained in:
CREDO23 2026-04-22 19:53:26 +02:00
commit 4915675f45
54 changed files with 2050 additions and 359 deletions

View file

@ -24,7 +24,6 @@ from deepagents.backends import StateBackend
from deepagents.graph import BASE_AGENT_PROMPT
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
from deepagents.middleware.summarization import create_summarization_middleware
from langchain.agents import create_agent
from langchain.agents.middleware import TodoListMiddleware
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
@ -41,6 +40,9 @@ from app.agents.new_chat.middleware import (
MemoryInjectionMiddleware,
SurfSenseFilesystemMiddleware,
)
from app.agents.new_chat.middleware.safe_summarization import (
create_safe_summarization_middleware,
)
from app.agents.new_chat.system_prompt import (
build_configurable_system_prompt,
build_surfsense_system_prompt,
@ -347,7 +349,7 @@ async def create_surfsense_deep_agent(
created_by_id=user_id,
thread_id=thread_id,
),
create_summarization_middleware(llm, StateBackend),
create_safe_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
]
@ -377,7 +379,7 @@ async def create_surfsense_deep_agent(
thread_id=thread_id,
),
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
create_summarization_middleware(llm, StateBackend),
create_safe_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(),
DedupHITLToolCallsMiddleware(agent_tools=tools),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),

View file

@ -0,0 +1,123 @@
"""Safe wrapper around deepagents' SummarizationMiddleware.
Upstream issue
--------------
`deepagents.middleware.summarization.SummarizationMiddleware._aoffload_to_backend`
(and its sync counterpart) call
``get_buffer_string(filtered_messages)`` before writing the evicted history
to the backend file. In recent ``langchain-core`` versions, ``get_buffer_string``
accesses ``m.text`` which iterates ``self.content`` this raises
``TypeError: 'NoneType' object is not iterable`` whenever an ``AIMessage``
has ``content=None`` (common when a model returns *only* tool_calls, seen
frequently with Azure OpenAI ``gpt-5.x`` responses streamed through
LiteLLM).
The exception aborts the whole agent turn, so the user just sees "Error during
chat" with no assistant response.
Fix
---
We subclass ``SummarizationMiddleware`` and override
``_filter_summary_messages`` the only call site that feeds messages into
``get_buffer_string`` to return *copies* of messages whose ``content`` is
``None`` with ``content=""``. The originals flowing through the rest of the
agent state are untouched.
We also expose a drop-in ``create_safe_summarization_middleware`` factory
that mirrors ``deepagents.middleware.summarization.create_summarization_middleware``
but instantiates our safe subclass.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from deepagents.middleware.summarization import (
SummarizationMiddleware,
compute_summarization_defaults,
)
if TYPE_CHECKING:
from deepagents.backends.protocol import BACKEND_TYPES
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AnyMessage
logger = logging.getLogger(__name__)
def _sanitize_message_content(msg: AnyMessage) -> AnyMessage:
"""Return ``msg`` with ``content`` coerced to a non-``None`` value.
``get_buffer_string`` reads ``m.text`` which iterates ``self.content``;
when a provider streams back an ``AIMessage`` with only tool_calls and
no text, ``content`` can be ``None`` and the iteration explodes. We
replace ``None`` with an empty string so downstream consumers that only
care about text see an empty body.
The original message is left untouched we return a copy via
pydantic's ``model_copy`` when available, otherwise we fall back to
re-setting the attribute on a shallow copy.
"""
if getattr(msg, "content", "not-missing") is not None:
return msg
try:
return msg.model_copy(update={"content": ""})
except AttributeError:
import copy
new_msg = copy.copy(msg)
try:
new_msg.content = ""
except Exception: # pragma: no cover - defensive
logger.debug(
"Could not sanitize content=None on message of type %s",
type(msg).__name__,
)
return msg
return new_msg
class SafeSummarizationMiddleware(SummarizationMiddleware):
"""`SummarizationMiddleware` that tolerates messages with ``content=None``.
Only ``_filter_summary_messages`` is overridden this is the single
helper invoked by both the sync and async offload paths immediately
before ``get_buffer_string``. Normalising here means we get coverage
for both without having to copy the (long, rapidly-changing) offload
implementations from upstream.
"""
def _filter_summary_messages(self, messages: list[AnyMessage]) -> list[AnyMessage]:
filtered = super()._filter_summary_messages(messages)
return [_sanitize_message_content(m) for m in filtered]
def create_safe_summarization_middleware(
model: BaseChatModel,
backend: BACKEND_TYPES,
) -> SafeSummarizationMiddleware:
"""Drop-in replacement for ``create_summarization_middleware``.
Mirrors the defaults computed by ``deepagents`` but returns our
``SafeSummarizationMiddleware`` subclass so the
``content=None`` crash in ``get_buffer_string`` is avoided.
"""
defaults = compute_summarization_defaults(model)
return SafeSummarizationMiddleware(
model=model,
backend=backend,
trigger=defaults["trigger"],
keep=defaults["keep"],
trim_tokens_to_summarize=None,
truncate_args_settings=defaults["truncate_args_settings"],
)
__all__ = [
"SafeSummarizationMiddleware",
"create_safe_summarization_middleware",
]

View file

@ -114,8 +114,19 @@ def _surfsense_error_handler(request: Request, exc: SurfSenseError) -> JSONRespo
def _http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
"""Wrap FastAPI/Starlette HTTPExceptions into the standard envelope."""
"""Wrap FastAPI/Starlette HTTPExceptions into the standard envelope.
5xx sanitization policy:
- 500 responses are sanitized (replaced with ``GENERIC_5XX_MESSAGE``) because
they usually wrap raw internal errors and may leak sensitive info.
- Other 5xx statuses (501, 502, 503, 504, ...) are raised explicitly by
route code to communicate a specific, user-safe operational state
(e.g. 503 "Page purchases are temporarily unavailable."). Those details
are preserved so the frontend can render them, but the error is still
logged server-side.
"""
rid = _get_request_id(request)
should_sanitize = exc.status_code == 500
# Structured dict details (e.g. {"code": "CAPTCHA_REQUIRED", "message": "..."})
# are preserved so the frontend can parse them.
@ -130,6 +141,7 @@ def _http_exception_handler(request: Request, exc: HTTPException) -> JSONRespons
exc.status_code,
message,
)
if should_sanitize:
message = GENERIC_5XX_MESSAGE
err_code = "INTERNAL_ERROR"
body = {
@ -158,6 +170,7 @@ def _http_exception_handler(request: Request, exc: HTTPException) -> JSONRespons
exc.status_code,
detail,
)
if should_sanitize:
detail = GENERIC_5XX_MESSAGE
code = _status_to_code(exc.status_code, detail)
return _build_error_response(exc.status_code, detail, code=code, request_id=rid)

View file

@ -133,6 +133,44 @@ PROVIDER_MAP = {
}
# Default ``api_base`` per LiteLLM provider prefix. Used as a safety net when
# a global LLM config does *not* specify ``api_base``: without this, LiteLLM
# happily picks up provider-agnostic env vars (e.g. ``AZURE_API_BASE``,
# ``OPENAI_API_BASE``) and routes, say, an ``openrouter/anthropic/claude-3-haiku``
# request to an Azure endpoint, which then 404s with ``Resource not found``.
# Only providers with a well-known, stable public base URL are listed here —
# self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
# huggingface, databricks, cloudflare, replicate) are intentionally omitted
# so their existing config-driven behaviour is preserved.
PROVIDER_DEFAULT_API_BASE = {
"openrouter": "https://openrouter.ai/api/v1",
"groq": "https://api.groq.com/openai/v1",
"mistral": "https://api.mistral.ai/v1",
"perplexity": "https://api.perplexity.ai",
"xai": "https://api.x.ai/v1",
"cerebras": "https://api.cerebras.ai/v1",
"deepinfra": "https://api.deepinfra.com/v1/openai",
"fireworks_ai": "https://api.fireworks.ai/inference/v1",
"together_ai": "https://api.together.xyz/v1",
"anyscale": "https://api.endpoints.anyscale.com/v1",
"cometapi": "https://api.cometapi.com/v1",
"sambanova": "https://api.sambanova.ai/v1",
}
# Canonical provider → base URL when a config uses a generic ``openai``-style
# prefix but the ``provider`` field tells us which API it really is
# (e.g. DeepSeek/Alibaba/Moonshot/Zhipu/MiniMax all use ``openai`` compat but
# each has its own base URL).
PROVIDER_KEY_DEFAULT_API_BASE = {
"DEEPSEEK": "https://api.deepseek.com/v1",
"ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
"MOONSHOT": "https://api.moonshot.ai/v1",
"ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
"MINIMAX": "https://api.minimax.io/v1",
}
class LLMRouterService:
"""
Singleton service for managing LiteLLM Router.
@ -224,6 +262,16 @@ class LLMRouterService:
# hits ContextWindowExceededError.
full_model_list, ctx_fallbacks = cls._build_context_fallback_groups(model_list)
# Build a general-purpose fallback list so NotFound/timeout/rate-limit
# style failures on one deployment don't bubble up as hard errors —
# the router retries with a sibling deployment in ``auto-large``.
# ``auto-large`` is the large-context subset of ``auto``; if it is
# empty we fall back to ``auto`` itself so the router at least picks a
# different deployment in the same group.
fallbacks: list[dict[str, list[str]]] | None = None
if ctx_fallbacks:
fallbacks = [{"auto": ["auto-large"]}]
try:
router_kwargs: dict[str, Any] = {
"model_list": full_model_list,
@ -237,15 +285,18 @@ class LLMRouterService:
}
if ctx_fallbacks:
router_kwargs["context_window_fallbacks"] = ctx_fallbacks
if fallbacks:
router_kwargs["fallbacks"] = fallbacks
instance._router = Router(**router_kwargs)
instance._initialized = True
logger.info(
"LLM Router initialized with %d deployments, "
"strategy: %s, context_window_fallbacks: %s",
"strategy: %s, context_window_fallbacks: %s, fallbacks: %s",
len(model_list),
final_settings.get("routing_strategy"),
ctx_fallbacks or "none",
fallbacks or "none",
)
except Exception as e:
logger.error(f"Failed to initialize LLM Router: {e}")
@ -348,10 +399,11 @@ class LLMRouterService:
return None
# Build model string
provider = config.get("provider", "").upper()
if config.get("custom_provider"):
model_string = f"{config['custom_provider']}/{config['model_name']}"
provider_prefix = config["custom_provider"]
model_string = f"{provider_prefix}/{config['model_name']}"
else:
provider = config.get("provider", "").upper()
provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
model_string = f"{provider_prefix}/{config['model_name']}"
@ -361,9 +413,19 @@ class LLMRouterService:
"api_key": config.get("api_key"),
}
# Add optional api_base
if config.get("api_base"):
litellm_params["api_base"] = config["api_base"]
# Resolve ``api_base``. Config value wins; otherwise apply a
# provider-aware default so the deployment does not silently
# inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
# requests to the wrong endpoint. See ``PROVIDER_DEFAULT_API_BASE``
# docstring for the motivating bug (OpenRouter models 404-ing
# against an Azure endpoint).
api_base = config.get("api_base")
if not api_base:
api_base = PROVIDER_KEY_DEFAULT_API_BASE.get(provider)
if not api_base:
api_base = PROVIDER_DEFAULT_API_BASE.get(provider_prefix)
if api_base:
litellm_params["api_base"] = api_base
# Add any additional litellm parameters
if config.get("litellm_params"):

View file

@ -1,3 +1,4 @@
import asyncio
import logging
import litellm
@ -32,6 +33,39 @@ litellm.callbacks = [token_tracker]
logger = logging.getLogger(__name__)
# Providers that require an interactive OAuth / device-flow login before
# issuing any completion. LiteLLM implements these with blocking sync polling
# (requests + time.sleep), which would freeze the FastAPI event loop if
# invoked from validation. They are never usable from a headless backend,
# so we reject them at the edge.
_INTERACTIVE_AUTH_PROVIDERS: frozenset[str] = frozenset(
{
"github_copilot",
"github-copilot",
"githubcopilot",
"copilot",
}
)
# Hard upper bound for a single validation call. Must exceed the ChatLiteLLM
# request timeout (30s) by a small margin so a well-behaved provider never
# trips the watchdog, while any pathological/blocking provider is killed.
_VALIDATION_TIMEOUT_SECONDS: float = 35.0
def _is_interactive_auth_provider(
provider: str | None, custom_provider: str | None
) -> bool:
"""Return True if the given provider triggers interactive OAuth in LiteLLM."""
for raw in (custom_provider, provider):
if not raw:
continue
normalized = raw.strip().lower().replace(" ", "_")
if normalized in _INTERACTIVE_AUTH_PROVIDERS:
return True
return False
class LLMRole:
AGENT = "agent" # For agent/chat operations
DOCUMENT_SUMMARY = "document_summary" # For document summarization
@ -93,6 +127,25 @@ async def validate_llm_config(
- is_valid: True if config works, False otherwise
- error_message: Empty string if valid, error description if invalid
"""
# Reject providers that require interactive OAuth/device-flow auth.
# LiteLLM's github_copilot provider (and similar) uses a blocking sync
# Authenticator that polls GitHub for up to several minutes and prints a
# device code to stdout. Running it on the FastAPI event loop will freeze
# the entire backend, so we refuse them up front.
if _is_interactive_auth_provider(provider, custom_provider):
msg = (
"Provider requires interactive OAuth/device-flow authentication "
"(e.g. github_copilot) and cannot be used in a hosted backend. "
"Please choose a provider that authenticates via API key."
)
logger.warning(
"Rejected LLM config validation for interactive-auth provider "
"(provider=%r, custom_provider=%r)",
provider,
custom_provider,
)
return False, msg
try:
# Build the model string for litellm
if custom_provider:
@ -153,9 +206,30 @@ async def validate_llm_config(
llm = SanitizedChatLiteLLM(**litellm_kwargs)
# Make a simple test call
# Run the test call in a worker thread with a hard timeout. Some
# LiteLLM providers have synchronous blocking code paths (e.g. OAuth
# authenticators that call time.sleep and requests.post) that would
# otherwise freeze the asyncio event loop. Offloading to a thread and
# bounding the wait keeps the server responsive even if a provider
# misbehaves.
test_message = HumanMessage(content="Hello")
response = await llm.ainvoke([test_message])
try:
response = await asyncio.wait_for(
asyncio.to_thread(llm.invoke, [test_message]),
timeout=_VALIDATION_TIMEOUT_SECONDS,
)
except TimeoutError:
logger.warning(
"LLM config validation timed out after %ss for model: %s",
_VALIDATION_TIMEOUT_SECONDS,
model_string,
)
return (
False,
f"Validation timed out after {int(_VALIDATION_TIMEOUT_SECONDS)}s. "
"The provider is unreachable or requires interactive "
"authentication that is not supported by the backend.",
)
# If we got here without exception, the config is valid
if response and response.content:

View file

@ -1,6 +1,6 @@
[project]
name = "surf-new-backend"
version = "0.0.16"
version = "0.0.19"
description = "SurfSense Backend"
requires-python = ">=3.12"
dependencies = [
@ -74,7 +74,7 @@ dependencies = [
"deepagents>=0.4.12",
"stripe>=15.0.0",
"azure-ai-documentintelligence>=1.0.2",
"litellm>=1.83.0",
"litellm>=1.83.4",
"langchain-litellm>=0.6.4",
]

View file

@ -70,6 +70,20 @@ def _make_test_app():
async def raise_http_500():
raise HTTPException(status_code=500, detail="secret db password leaked")
@app.get("/http-503")
async def raise_http_503():
raise HTTPException(
status_code=503,
detail="Page purchases are temporarily unavailable.",
)
@app.get("/http-502")
async def raise_http_502():
raise HTTPException(
status_code=502,
detail="Unable to create Stripe checkout session.",
)
@app.get("/surfsense-connector")
async def raise_connector():
raise ConnectorError("GitHub API returned 401")
@ -184,6 +198,18 @@ class TestHTTPExceptionHandler:
assert body["error"]["message"] == GENERIC_5XX_MESSAGE
assert body["error"]["code"] == "INTERNAL_ERROR"
def test_503_preserves_detail(self, client):
# Intentional 503s (e.g. feature flag off) must surface the developer
# message so the frontend can render actionable copy.
body = _assert_envelope(client.get("/http-503"), 503)
assert body["error"]["message"] == "Page purchases are temporarily unavailable."
assert body["error"]["message"] != GENERIC_5XX_MESSAGE
def test_502_preserves_detail(self, client):
body = _assert_envelope(client.get("/http-502"), 502)
assert body["error"]["message"] == "Unable to create Stripe checkout session."
assert body["error"]["message"] != GENERIC_5XX_MESSAGE
# ---------------------------------------------------------------------------
# SurfSenseError hierarchy

View file

@ -7947,7 +7947,7 @@ wheels = [
[[package]]
name = "surf-new-backend"
version = "0.0.16"
version = "0.0.19"
source = { editable = "." }
dependencies = [
{ name = "alembic" },
@ -8070,7 +8070,7 @@ requires-dist = [
{ name = "langgraph", specifier = ">=1.1.3" },
{ name = "langgraph-checkpoint-postgres", specifier = ">=3.0.2" },
{ name = "linkup-sdk", specifier = ">=0.2.4" },
{ name = "litellm", specifier = ">=1.83.0" },
{ name = "litellm", specifier = ">=1.83.4" },
{ name = "llama-cloud-services", specifier = ">=0.6.25" },
{ name = "markdown", specifier = ">=3.7" },
{ name = "markdownify", specifier = ">=0.14.1" },