Merge upstream/dev into feature/mcp-migration

2026-06-26 21:39:43 +02:00 · 2026-04-22 19:53:26 +02:00 · 2026-04-22 19:53:26 +02:00 · 4915675f45
commit 4915675f45
parent a4134f1f03 b067c92b4c
54 changed files with 2050 additions and 359 deletions
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -24,7 +24,6 @@ from deepagents.backends import StateBackend
 from deepagents.graph import BASE_AGENT_PROMPT
 from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
 from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
-from deepagents.middleware.summarization import create_summarization_middleware
 from langchain.agents import create_agent
 from langchain.agents.middleware import TodoListMiddleware
 from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
@ -41,6 +40,9 @@ from app.agents.new_chat.middleware import (
    MemoryInjectionMiddleware,
    SurfSenseFilesystemMiddleware,
 )
+from app.agents.new_chat.middleware.safe_summarization import (
+    create_safe_summarization_middleware,
+)
 from app.agents.new_chat.system_prompt import (
    build_configurable_system_prompt,
    build_surfsense_system_prompt,
@ -347,7 +349,7 @@ async def create_surfsense_deep_agent(
            created_by_id=user_id,
            thread_id=thread_id,
        ),
-        create_summarization_middleware(llm, StateBackend),
+        create_safe_summarization_middleware(llm, StateBackend),
        PatchToolCallsMiddleware(),
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
    ]
@ -377,7 +379,7 @@ async def create_surfsense_deep_agent(
            thread_id=thread_id,
        ),
        SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
-        create_summarization_middleware(llm, StateBackend),
+        create_safe_summarization_middleware(llm, StateBackend),
        PatchToolCallsMiddleware(),
        DedupHITLToolCallsMiddleware(agent_tools=tools),
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
--- a/surfsense_backend/app/agents/new_chat/middleware/safe_summarization.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/safe_summarization.py
@ -0,0 +1,123 @@
+"""Safe wrapper around deepagents' SummarizationMiddleware.
+
+Upstream issue
+--------------
+`deepagents.middleware.summarization.SummarizationMiddleware._aoffload_to_backend`
+(and its sync counterpart) call
+``get_buffer_string(filtered_messages)`` before writing the evicted history
+to the backend file. In recent ``langchain-core`` versions, ``get_buffer_string``
+accesses ``m.text`` which iterates ``self.content`` — this raises
+``TypeError: 'NoneType' object is not iterable`` whenever an ``AIMessage``
+has ``content=None`` (common when a model returns *only* tool_calls, seen
+frequently with Azure OpenAI ``gpt-5.x`` responses streamed through
+LiteLLM).
+
+The exception aborts the whole agent turn, so the user just sees "Error during
+chat" with no assistant response.
+
+Fix
+---
+We subclass ``SummarizationMiddleware`` and override
+``_filter_summary_messages`` — the only call site that feeds messages into
+``get_buffer_string`` — to return *copies* of messages whose ``content`` is
+``None`` with ``content=""``. The originals flowing through the rest of the
+agent state are untouched.
+
+We also expose a drop-in ``create_safe_summarization_middleware`` factory
+that mirrors ``deepagents.middleware.summarization.create_summarization_middleware``
+but instantiates our safe subclass.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from deepagents.middleware.summarization import (
+    SummarizationMiddleware,
+    compute_summarization_defaults,
+)
+
+if TYPE_CHECKING:
+    from deepagents.backends.protocol import BACKEND_TYPES
+    from langchain_core.language_models import BaseChatModel
+    from langchain_core.messages import AnyMessage
+
+logger = logging.getLogger(__name__)
+
+
+def _sanitize_message_content(msg: AnyMessage) -> AnyMessage:
+    """Return ``msg`` with ``content`` coerced to a non-``None`` value.
+
+    ``get_buffer_string`` reads ``m.text`` which iterates ``self.content``;
+    when a provider streams back an ``AIMessage`` with only tool_calls and
+    no text, ``content`` can be ``None`` and the iteration explodes. We
+    replace ``None`` with an empty string so downstream consumers that only
+    care about text see an empty body.
+
+    The original message is left untouched — we return a copy via
+    pydantic's ``model_copy`` when available, otherwise we fall back to
+    re-setting the attribute on a shallow copy.
+    """
+
+    if getattr(msg, "content", "not-missing") is not None:
+        return msg
+
+    try:
+        return msg.model_copy(update={"content": ""})
+    except AttributeError:
+        import copy
+
+        new_msg = copy.copy(msg)
+        try:
+            new_msg.content = ""
+        except Exception:  # pragma: no cover - defensive
+            logger.debug(
+                "Could not sanitize content=None on message of type %s",
+                type(msg).__name__,
+            )
+            return msg
+        return new_msg
+
+
+class SafeSummarizationMiddleware(SummarizationMiddleware):
+    """`SummarizationMiddleware` that tolerates messages with ``content=None``.
+
+    Only ``_filter_summary_messages`` is overridden — this is the single
+    helper invoked by both the sync and async offload paths immediately
+    before ``get_buffer_string``. Normalising here means we get coverage
+    for both without having to copy the (long, rapidly-changing) offload
+    implementations from upstream.
+    """
+
+    def _filter_summary_messages(self, messages: list[AnyMessage]) -> list[AnyMessage]:
+        filtered = super()._filter_summary_messages(messages)
+        return [_sanitize_message_content(m) for m in filtered]
+
+
+def create_safe_summarization_middleware(
+    model: BaseChatModel,
+    backend: BACKEND_TYPES,
+) -> SafeSummarizationMiddleware:
+    """Drop-in replacement for ``create_summarization_middleware``.
+
+    Mirrors the defaults computed by ``deepagents`` but returns our
+    ``SafeSummarizationMiddleware`` subclass so the
+    ``content=None`` crash in ``get_buffer_string`` is avoided.
+    """
+
+    defaults = compute_summarization_defaults(model)
+    return SafeSummarizationMiddleware(
+        model=model,
+        backend=backend,
+        trigger=defaults["trigger"],
+        keep=defaults["keep"],
+        trim_tokens_to_summarize=None,
+        truncate_args_settings=defaults["truncate_args_settings"],
+    )
+
+
+__all__ = [
+    "SafeSummarizationMiddleware",
+    "create_safe_summarization_middleware",
+]
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@ -114,8 +114,19 @@ def _surfsense_error_handler(request: Request, exc: SurfSenseError) -> JSONRespo


 def _http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
-    """Wrap FastAPI/Starlette HTTPExceptions into the standard envelope."""
+    """Wrap FastAPI/Starlette HTTPExceptions into the standard envelope.
+
+    5xx sanitization policy:
+    - 500 responses are sanitized (replaced with ``GENERIC_5XX_MESSAGE``) because
+      they usually wrap raw internal errors and may leak sensitive info.
+    - Other 5xx statuses (501, 502, 503, 504, ...) are raised explicitly by
+      route code to communicate a specific, user-safe operational state
+      (e.g. 503 "Page purchases are temporarily unavailable."). Those details
+      are preserved so the frontend can render them, but the error is still
+      logged server-side.
+    """
    rid = _get_request_id(request)
+    should_sanitize = exc.status_code == 500

    # Structured dict details (e.g. {"code": "CAPTCHA_REQUIRED", "message": "..."})
    # are preserved so the frontend can parse them.
@ -130,6 +141,7 @@ def _http_exception_handler(request: Request, exc: HTTPException) -> JSONRespons
                exc.status_code,
                message,
            )
+        if should_sanitize:
            message = GENERIC_5XX_MESSAGE
            err_code = "INTERNAL_ERROR"
        body = {
@ -158,6 +170,7 @@ def _http_exception_handler(request: Request, exc: HTTPException) -> JSONRespons
            exc.status_code,
            detail,
        )
+    if should_sanitize:
        detail = GENERIC_5XX_MESSAGE
    code = _status_to_code(exc.status_code, detail)
    return _build_error_response(exc.status_code, detail, code=code, request_id=rid)
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@ -133,6 +133,44 @@ PROVIDER_MAP = {
 }


+# Default ``api_base`` per LiteLLM provider prefix.  Used as a safety net when
+# a global LLM config does *not* specify ``api_base``: without this, LiteLLM
+# happily picks up provider-agnostic env vars (e.g. ``AZURE_API_BASE``,
+# ``OPENAI_API_BASE``) and routes, say, an ``openrouter/anthropic/claude-3-haiku``
+# request to an Azure endpoint, which then 404s with ``Resource not found``.
+# Only providers with a well-known, stable public base URL are listed here —
+# self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
+# huggingface, databricks, cloudflare, replicate) are intentionally omitted
+# so their existing config-driven behaviour is preserved.
+PROVIDER_DEFAULT_API_BASE = {
+    "openrouter": "https://openrouter.ai/api/v1",
+    "groq": "https://api.groq.com/openai/v1",
+    "mistral": "https://api.mistral.ai/v1",
+    "perplexity": "https://api.perplexity.ai",
+    "xai": "https://api.x.ai/v1",
+    "cerebras": "https://api.cerebras.ai/v1",
+    "deepinfra": "https://api.deepinfra.com/v1/openai",
+    "fireworks_ai": "https://api.fireworks.ai/inference/v1",
+    "together_ai": "https://api.together.xyz/v1",
+    "anyscale": "https://api.endpoints.anyscale.com/v1",
+    "cometapi": "https://api.cometapi.com/v1",
+    "sambanova": "https://api.sambanova.ai/v1",
+}
+
+
+# Canonical provider → base URL when a config uses a generic ``openai``-style
+# prefix but the ``provider`` field tells us which API it really is
+# (e.g. DeepSeek/Alibaba/Moonshot/Zhipu/MiniMax all use ``openai`` compat but
+# each has its own base URL).
+PROVIDER_KEY_DEFAULT_API_BASE = {
+    "DEEPSEEK": "https://api.deepseek.com/v1",
+    "ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+    "MOONSHOT": "https://api.moonshot.ai/v1",
+    "ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
+    "MINIMAX": "https://api.minimax.io/v1",
+}
+
+
 class LLMRouterService:
    """
    Singleton service for managing LiteLLM Router.
@ -224,6 +262,16 @@ class LLMRouterService:
        # hits ContextWindowExceededError.
        full_model_list, ctx_fallbacks = cls._build_context_fallback_groups(model_list)

+        # Build a general-purpose fallback list so NotFound/timeout/rate-limit
+        # style failures on one deployment don't bubble up as hard errors —
+        # the router retries with a sibling deployment in ``auto-large``.
+        # ``auto-large`` is the large-context subset of ``auto``; if it is
+        # empty we fall back to ``auto`` itself so the router at least picks a
+        # different deployment in the same group.
+        fallbacks: list[dict[str, list[str]]] | None = None
+        if ctx_fallbacks:
+            fallbacks = [{"auto": ["auto-large"]}]
+
        try:
            router_kwargs: dict[str, Any] = {
                "model_list": full_model_list,
@ -237,15 +285,18 @@ class LLMRouterService:
            }
            if ctx_fallbacks:
                router_kwargs["context_window_fallbacks"] = ctx_fallbacks
+            if fallbacks:
+                router_kwargs["fallbacks"] = fallbacks

            instance._router = Router(**router_kwargs)
            instance._initialized = True
            logger.info(
                "LLM Router initialized with %d deployments, "
-                "strategy: %s, context_window_fallbacks: %s",
+                "strategy: %s, context_window_fallbacks: %s, fallbacks: %s",
                len(model_list),
                final_settings.get("routing_strategy"),
                ctx_fallbacks or "none",
+                fallbacks or "none",
            )
        except Exception as e:
            logger.error(f"Failed to initialize LLM Router: {e}")
@ -348,10 +399,11 @@ class LLMRouterService:
                return None

            # Build model string
+            provider = config.get("provider", "").upper()
            if config.get("custom_provider"):
-                model_string = f"{config['custom_provider']}/{config['model_name']}"
+                provider_prefix = config["custom_provider"]
+                model_string = f"{provider_prefix}/{config['model_name']}"
            else:
-                provider = config.get("provider", "").upper()
                provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
                model_string = f"{provider_prefix}/{config['model_name']}"

@ -361,9 +413,19 @@ class LLMRouterService:
                "api_key": config.get("api_key"),
            }

-            # Add optional api_base
-            if config.get("api_base"):
-                litellm_params["api_base"] = config["api_base"]
+            # Resolve ``api_base``. Config value wins; otherwise apply a
+            # provider-aware default so the deployment does not silently
+            # inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
+            # requests to the wrong endpoint.  See ``PROVIDER_DEFAULT_API_BASE``
+            # docstring for the motivating bug (OpenRouter models 404-ing
+            # against an Azure endpoint).
+            api_base = config.get("api_base")
+            if not api_base:
+                api_base = PROVIDER_KEY_DEFAULT_API_BASE.get(provider)
+            if not api_base:
+                api_base = PROVIDER_DEFAULT_API_BASE.get(provider_prefix)
+            if api_base:
+                litellm_params["api_base"] = api_base

            # Add any additional litellm parameters
            if config.get("litellm_params"):
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -1,3 +1,4 @@
+import asyncio
 import logging

 import litellm
@ -32,6 +33,39 @@ litellm.callbacks = [token_tracker]
 logger = logging.getLogger(__name__)


+# Providers that require an interactive OAuth / device-flow login before
+# issuing any completion. LiteLLM implements these with blocking sync polling
+# (requests + time.sleep), which would freeze the FastAPI event loop if
+# invoked from validation. They are never usable from a headless backend,
+# so we reject them at the edge.
+_INTERACTIVE_AUTH_PROVIDERS: frozenset[str] = frozenset(
+    {
+        "github_copilot",
+        "github-copilot",
+        "githubcopilot",
+        "copilot",
+    }
+)
+
+# Hard upper bound for a single validation call. Must exceed the ChatLiteLLM
+# request timeout (30s) by a small margin so a well-behaved provider never
+# trips the watchdog, while any pathological/blocking provider is killed.
+_VALIDATION_TIMEOUT_SECONDS: float = 35.0
+
+
+def _is_interactive_auth_provider(
+    provider: str | None, custom_provider: str | None
+) -> bool:
+    """Return True if the given provider triggers interactive OAuth in LiteLLM."""
+    for raw in (custom_provider, provider):
+        if not raw:
+            continue
+        normalized = raw.strip().lower().replace(" ", "_")
+        if normalized in _INTERACTIVE_AUTH_PROVIDERS:
+            return True
+    return False
+
+
 class LLMRole:
    AGENT = "agent"  # For agent/chat operations
    DOCUMENT_SUMMARY = "document_summary"  # For document summarization
@ -93,6 +127,25 @@ async def validate_llm_config(
        - is_valid: True if config works, False otherwise
        - error_message: Empty string if valid, error description if invalid
    """
+    # Reject providers that require interactive OAuth/device-flow auth.
+    # LiteLLM's github_copilot provider (and similar) uses a blocking sync
+    # Authenticator that polls GitHub for up to several minutes and prints a
+    # device code to stdout. Running it on the FastAPI event loop will freeze
+    # the entire backend, so we refuse them up front.
+    if _is_interactive_auth_provider(provider, custom_provider):
+        msg = (
+            "Provider requires interactive OAuth/device-flow authentication "
+            "(e.g. github_copilot) and cannot be used in a hosted backend. "
+            "Please choose a provider that authenticates via API key."
+        )
+        logger.warning(
+            "Rejected LLM config validation for interactive-auth provider "
+            "(provider=%r, custom_provider=%r)",
+            provider,
+            custom_provider,
+        )
+        return False, msg
+
    try:
        # Build the model string for litellm
        if custom_provider:
@ -153,9 +206,30 @@ async def validate_llm_config(

        llm = SanitizedChatLiteLLM(**litellm_kwargs)

-        # Make a simple test call
+        # Run the test call in a worker thread with a hard timeout. Some
+        # LiteLLM providers have synchronous blocking code paths (e.g. OAuth
+        # authenticators that call time.sleep and requests.post) that would
+        # otherwise freeze the asyncio event loop. Offloading to a thread and
+        # bounding the wait keeps the server responsive even if a provider
+        # misbehaves.
        test_message = HumanMessage(content="Hello")
-        response = await llm.ainvoke([test_message])
+        try:
+            response = await asyncio.wait_for(
+                asyncio.to_thread(llm.invoke, [test_message]),
+                timeout=_VALIDATION_TIMEOUT_SECONDS,
+            )
+        except TimeoutError:
+            logger.warning(
+                "LLM config validation timed out after %ss for model: %s",
+                _VALIDATION_TIMEOUT_SECONDS,
+                model_string,
+            )
+            return (
+                False,
+                f"Validation timed out after {int(_VALIDATION_TIMEOUT_SECONDS)}s. "
+                "The provider is unreachable or requires interactive "
+                "authentication that is not supported by the backend.",
+            )

        # If we got here without exception, the config is valid
        if response and response.content:
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "surf-new-backend"
-version = "0.0.16"
+version = "0.0.19"
 description = "SurfSense Backend"
 requires-python = ">=3.12"
 dependencies = [
@ -74,7 +74,7 @@ dependencies = [
    "deepagents>=0.4.12",
    "stripe>=15.0.0",
    "azure-ai-documentintelligence>=1.0.2",
-    "litellm>=1.83.0",
+    "litellm>=1.83.4",
    "langchain-litellm>=0.6.4",
 ]

--- a/surfsense_backend/tests/unit/test_error_contract.py
+++ b/surfsense_backend/tests/unit/test_error_contract.py
@ -70,6 +70,20 @@ def _make_test_app():
    async def raise_http_500():
        raise HTTPException(status_code=500, detail="secret db password leaked")

+    @app.get("/http-503")
+    async def raise_http_503():
+        raise HTTPException(
+            status_code=503,
+            detail="Page purchases are temporarily unavailable.",
+        )
+
+    @app.get("/http-502")
+    async def raise_http_502():
+        raise HTTPException(
+            status_code=502,
+            detail="Unable to create Stripe checkout session.",
+        )
+
    @app.get("/surfsense-connector")
    async def raise_connector():
        raise ConnectorError("GitHub API returned 401")
@ -184,6 +198,18 @@ class TestHTTPExceptionHandler:
        assert body["error"]["message"] == GENERIC_5XX_MESSAGE
        assert body["error"]["code"] == "INTERNAL_ERROR"

+    def test_503_preserves_detail(self, client):
+        # Intentional 503s (e.g. feature flag off) must surface the developer
+        # message so the frontend can render actionable copy.
+        body = _assert_envelope(client.get("/http-503"), 503)
+        assert body["error"]["message"] == "Page purchases are temporarily unavailable."
+        assert body["error"]["message"] != GENERIC_5XX_MESSAGE
+
+    def test_502_preserves_detail(self, client):
+        body = _assert_envelope(client.get("/http-502"), 502)
+        assert body["error"]["message"] == "Unable to create Stripe checkout session."
+        assert body["error"]["message"] != GENERIC_5XX_MESSAGE
+

 # ---------------------------------------------------------------------------
 # SurfSenseError hierarchy
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -7947,7 +7947,7 @@ wheels = [

 [[package]]
 name = "surf-new-backend"
-version = "0.0.16"
+version = "0.0.19"
 source = { editable = "." }
 dependencies = [
    { name = "alembic" },
@ -8070,7 +8070,7 @@ requires-dist = [
    { name = "langgraph", specifier = ">=1.1.3" },
    { name = "langgraph-checkpoint-postgres", specifier = ">=3.0.2" },
    { name = "linkup-sdk", specifier = ">=0.2.4" },
-    { name = "litellm", specifier = ">=1.83.0" },
+    { name = "litellm", specifier = ">=1.83.4" },
    { name = "llama-cloud-services", specifier = ">=0.6.25" },
    { name = "markdown", specifier = ">=3.7" },
    { name = "markdownify", specifier = ">=0.14.1" },