refactor(chat): add streaming/context/ for mentioned-docs and deep-agents todos

Extracts two pure context helpers used during input-state assembly:

* mentioned_docs.format_mentioned_surfsense_docs_as_context: renders the
  user's @-mentioned SurfSense docs into the LLM context block.
* deepagents_todos.extract_todos_from_deepagents: pulls the in-progress
  todo list from a deep-agents state snapshot for the title generator.

Add-only; existing call sites in stream_new_chat.py remain untouched
until cutover.
This commit is contained in:
CREDO23 2026-05-25 21:48:08 +02:00
parent 4910263c93
commit c13beae1ce
3 changed files with 100 additions and 0 deletions

View file

@ -0,0 +1,15 @@
"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
from __future__ import annotations
from app.tasks.chat.streaming.context.deepagents_todos import (
extract_todos_from_deepagents,
)
from app.tasks.chat.streaming.context.mentioned_docs import (
format_mentioned_surfsense_docs_as_context,
)
__all__ = [
"extract_todos_from_deepagents",
"format_mentioned_surfsense_docs_as_context",
]

View file

@ -0,0 +1,27 @@
"""Extract todos from a deepagents ``TodoListMiddleware`` ``Command`` output."""
from __future__ import annotations
from typing import Any
def extract_todos_from_deepagents(command_output: Any) -> dict:
"""Normalize todos out of a deepagents ``Command`` or dict payload.
deepagents returns a ``Command`` whose ``update['todos']`` is a list of
``{'content': str, 'status': str}`` dicts. The UI expects the same shape,
so no transformation is required only extraction.
"""
todos_data: list = []
if hasattr(command_output, "update"):
update = command_output.update
todos_data = update.get("todos", [])
elif isinstance(command_output, dict):
if "todos" in command_output:
todos_data = command_output.get("todos", [])
elif "update" in command_output and isinstance(
command_output["update"], dict
):
todos_data = command_output["update"].get("todos", [])
return {"todos": todos_data}

View file

@ -0,0 +1,58 @@
"""Render user-mentioned SurfSense docs as XML context for the agent."""
from __future__ import annotations
import json
from app.db import SurfsenseDocsDocument
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_mentioned_surfsense_docs_as_context(
documents: list[SurfsenseDocsDocument],
) -> str:
if not documents:
return ""
context_parts = ["<mentioned_surfsense_docs>"]
context_parts.append(
"The user has explicitly mentioned the following SurfSense documentation pages. "
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
)
for doc in documents:
public_url = surfsense_docs_public_url(doc.source)
metadata_json = json.dumps(
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
)
context_parts.append("<document>")
context_parts.append("<document_metadata>")
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
context_parts.append(
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
)
context_parts.append("</document_metadata>")
context_parts.append("")
context_parts.append("<document_content>")
if hasattr(doc, "chunks") and doc.chunks:
for chunk in doc.chunks:
context_parts.append(
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
)
else:
context_parts.append(
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
)
context_parts.append("</document_content>")
context_parts.append("</document>")
context_parts.append("")
context_parts.append("</mentioned_surfsense_docs>")
return "\n".join(context_parts)