refactor(chat): add streaming/context/ for mentioned-docs and deep-agents todos

Extracts two pure context helpers used during input-state assembly: * mentioned_docs.format_mentioned_surfsense_docs_as_context: renders the user's @-mentioned SurfSense docs into the LLM context block. * deepagents_todos.extract_todos_from_deepagents: pulls the in-progress todo list from a deep-agents state snapshot for the title generator. Add-only; existing call sites in stream_new_chat.py remain untouched until cutover.
2026-07-16 23:01:06 +02:00 · 2026-05-25 21:48:08 +02:00 · 2026-05-25 21:48:08 +02:00 · c13beae1ce
commit c13beae1ce
parent 4910263c93
3 changed files with 100 additions and 0 deletions
--- a/surfsense_backend/app/tasks/chat/streaming/context/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/context/init.py
@ -0,0 +1,15 @@
+"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.context.deepagents_todos import (
+    extract_todos_from_deepagents,
+)
+from app.tasks.chat.streaming.context.mentioned_docs import (
+    format_mentioned_surfsense_docs_as_context,
+)
+
+__all__ = [
+    "extract_todos_from_deepagents",
+    "format_mentioned_surfsense_docs_as_context",
+]
--- a/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py
+++ b/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py
@ -0,0 +1,27 @@
+"""Extract todos from a deepagents ``TodoListMiddleware`` ``Command`` output."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def extract_todos_from_deepagents(command_output: Any) -> dict:
+    """Normalize todos out of a deepagents ``Command`` or dict payload.
+
+    deepagents returns a ``Command`` whose ``update['todos']`` is a list of
+    ``{'content': str, 'status': str}`` dicts. The UI expects the same shape,
+    so no transformation is required — only extraction.
+    """
+    todos_data: list = []
+    if hasattr(command_output, "update"):
+        update = command_output.update
+        todos_data = update.get("todos", [])
+    elif isinstance(command_output, dict):
+        if "todos" in command_output:
+            todos_data = command_output.get("todos", [])
+        elif "update" in command_output and isinstance(
+            command_output["update"], dict
+        ):
+            todos_data = command_output["update"].get("todos", [])
+
+    return {"todos": todos_data}
--- a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py
+++ b/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py
@ -0,0 +1,58 @@
+"""Render user-mentioned SurfSense docs as XML context for the agent."""
+
+from __future__ import annotations
+
+import json
+
+from app.db import SurfsenseDocsDocument
+from app.utils.surfsense_docs import surfsense_docs_public_url
+
+
+def format_mentioned_surfsense_docs_as_context(
+    documents: list[SurfsenseDocsDocument],
+) -> str:
+    if not documents:
+        return ""
+
+    context_parts = ["<mentioned_surfsense_docs>"]
+    context_parts.append(
+        "The user has explicitly mentioned the following SurfSense documentation pages. "
+        "These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
+        "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
+    )
+
+    for doc in documents:
+        public_url = surfsense_docs_public_url(doc.source)
+        metadata_json = json.dumps(
+            {"source": doc.source, "public_url": public_url}, ensure_ascii=False
+        )
+
+        context_parts.append("<document>")
+        context_parts.append("<document_metadata>")
+        context_parts.append(f"  <document_id>doc-{doc.id}</document_id>")
+        context_parts.append("  <document_type>SURFSENSE_DOCS</document_type>")
+        context_parts.append(f"  <title><![CDATA[{doc.title}]]></title>")
+        context_parts.append(f"  <url><![CDATA[{public_url}]]></url>")
+        context_parts.append(
+            f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
+        )
+        context_parts.append("</document_metadata>")
+        context_parts.append("")
+        context_parts.append("<document_content>")
+
+        if hasattr(doc, "chunks") and doc.chunks:
+            for chunk in doc.chunks:
+                context_parts.append(
+                    f"  <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
+                )
+        else:
+            context_parts.append(
+                f"  <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
+            )
+
+        context_parts.append("</document_content>")
+        context_parts.append("</document>")
+        context_parts.append("")
+
+    context_parts.append("</mentioned_surfsense_docs>")
+    return "\n".join(context_parts)