diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 940538bff..23c3daee2 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -52,38 +52,41 @@ def format_mentioned_documents_as_context(documents: list[Document]) -> str: """Format mentioned documents as context for the agent.""" if not documents: return "" + import json - context_parts = [""] - context_parts.append( - "The user has explicitly mentioned the following documents from their knowledge base. " - "These documents are directly relevant to the query and should be prioritized as primary sources." - ) - for i, doc in enumerate(documents, 1): - # Prepare retriever-style structure - doc_metadata = doc.document_metadata if doc.document_metadata else {} - xml_doc = f""" - - {doc.id} - - 1.0 - - - {doc.id}-full - - - - - {doc.id} - <![CDATA[{doc.title}]]> - {doc.document_type.value} - - - {doc.document_type.value} - """ - context_parts.append(xml_doc.strip()) - context_parts.append("") - - return "\n".join(context_parts) + parts = [] + for doc in documents: + metadata = doc.document_metadata or {} + chunks = ( + [ + {"chunk_id": c.id, "content": c.content} + for c in getattr(doc, "chunks", []) + ] + if hasattr(doc, "chunks") and doc.chunks + else [{"chunk_id": doc.id, "content": doc.content}] + ) + metadata_json = json.dumps(metadata, ensure_ascii=False) + parts.append("") + parts.append("") + parts.append(f" {doc.id}") + parts.append(f" {doc.document_type.value}") + parts.append(f" <![CDATA[{doc.title}]]>") + parts.append(" ") + parts.append(f" ") + parts.append("") + parts.append("") + parts.append("") + for ch in chunks: + ch_content = ch["content"] + ch_id = ch["chunk_id"] + if ch_id is None: + parts.append(f" ") + else: + parts.append(f" ") + parts.append("") + parts.append("") + parts.append("") + return "\n".join(parts).strip() async def stream_new_chat(