mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
refactor(chat): add streaming/context/ for mentioned-docs and deep-agents todos
Extracts two pure context helpers used during input-state assembly: * mentioned_docs.format_mentioned_surfsense_docs_as_context: renders the user's @-mentioned SurfSense docs into the LLM context block. * deepagents_todos.extract_todos_from_deepagents: pulls the in-progress todo list from a deep-agents state snapshot for the title generator. Add-only; existing call sites in stream_new_chat.py remain untouched until cutover.
This commit is contained in:
parent
4910263c93
commit
c13beae1ce
3 changed files with 100 additions and 0 deletions
|
|
@ -0,0 +1,15 @@
|
|||
"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.tasks.chat.streaming.context.deepagents_todos import (
|
||||
extract_todos_from_deepagents,
|
||||
)
|
||||
from app.tasks.chat.streaming.context.mentioned_docs import (
|
||||
format_mentioned_surfsense_docs_as_context,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"extract_todos_from_deepagents",
|
||||
"format_mentioned_surfsense_docs_as_context",
|
||||
]
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
"""Extract todos from a deepagents ``TodoListMiddleware`` ``Command`` output."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
def extract_todos_from_deepagents(command_output: Any) -> dict:
|
||||
"""Normalize todos out of a deepagents ``Command`` or dict payload.
|
||||
|
||||
deepagents returns a ``Command`` whose ``update['todos']`` is a list of
|
||||
``{'content': str, 'status': str}`` dicts. The UI expects the same shape,
|
||||
so no transformation is required — only extraction.
|
||||
"""
|
||||
todos_data: list = []
|
||||
if hasattr(command_output, "update"):
|
||||
update = command_output.update
|
||||
todos_data = update.get("todos", [])
|
||||
elif isinstance(command_output, dict):
|
||||
if "todos" in command_output:
|
||||
todos_data = command_output.get("todos", [])
|
||||
elif "update" in command_output and isinstance(
|
||||
command_output["update"], dict
|
||||
):
|
||||
todos_data = command_output["update"].get("todos", [])
|
||||
|
||||
return {"todos": todos_data}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
"""Render user-mentioned SurfSense docs as XML context for the agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.db import SurfsenseDocsDocument
|
||||
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||
|
||||
|
||||
def format_mentioned_surfsense_docs_as_context(
|
||||
documents: list[SurfsenseDocsDocument],
|
||||
) -> str:
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
context_parts = ["<mentioned_surfsense_docs>"]
|
||||
context_parts.append(
|
||||
"The user has explicitly mentioned the following SurfSense documentation pages. "
|
||||
"These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
|
||||
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
|
||||
)
|
||||
|
||||
for doc in documents:
|
||||
public_url = surfsense_docs_public_url(doc.source)
|
||||
metadata_json = json.dumps(
|
||||
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
||||
)
|
||||
|
||||
context_parts.append("<document>")
|
||||
context_parts.append("<document_metadata>")
|
||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
||||
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
||||
context_parts.append(
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
||||
)
|
||||
context_parts.append("</document_metadata>")
|
||||
context_parts.append("")
|
||||
context_parts.append("<document_content>")
|
||||
|
||||
if hasattr(doc, "chunks") and doc.chunks:
|
||||
for chunk in doc.chunks:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
|
||||
)
|
||||
else:
|
||||
context_parts.append(
|
||||
f" <chunk id='doc-0'><![CDATA[{doc.content}]]></chunk>"
|
||||
)
|
||||
|
||||
context_parts.append("</document_content>")
|
||||
context_parts.append("</document>")
|
||||
context_parts.append("")
|
||||
|
||||
context_parts.append("</mentioned_surfsense_docs>")
|
||||
return "\n".join(context_parts)
|
||||
Loading…
Add table
Add a link
Reference in a new issue