fix: mentioned documents xml structure

This commit is contained in:
CREDO23 2025-12-24 23:35:20 +02:00
parent 3660b91e63
commit ef9e9b65df

View file

@ -52,38 +52,41 @@ def format_mentioned_documents_as_context(documents: list[Document]) -> str:
"""Format mentioned documents as context for the agent.""" """Format mentioned documents as context for the agent."""
if not documents: if not documents:
return "" return ""
import json
context_parts = ["<mentioned_documents>"] parts = []
context_parts.append( for doc in documents:
"The user has explicitly mentioned the following documents from their knowledge base. " metadata = doc.document_metadata or {}
"These documents are directly relevant to the query and should be prioritized as primary sources." chunks = (
[
{"chunk_id": c.id, "content": c.content}
for c in getattr(doc, "chunks", [])
]
if hasattr(doc, "chunks") and doc.chunks
else [{"chunk_id": doc.id, "content": doc.content}]
) )
for i, doc in enumerate(documents, 1): metadata_json = json.dumps(metadata, ensure_ascii=False)
# Prepare retriever-style structure parts.append("<document>")
doc_metadata = doc.document_metadata if doc.document_metadata else {} parts.append("<document_metadata>")
xml_doc = f""" parts.append(f" <document_id>{doc.id}</document_id>")
<document index='{i}'> parts.append(f" <document_type>{doc.document_type.value}</document_type>")
<document_id>{doc.id}</document_id> parts.append(f" <title><![CDATA[{doc.title}]]></title>")
<content><![CDATA[{doc.content}]]></content> parts.append(" <url><![CDATA[]]></url>")
<score>1.0</score> parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
<chunks> parts.append("</document_metadata>")
<chunk> parts.append("")
<chunk_id>{doc.id}-full</chunk_id> parts.append("<document_content>")
<content><![CDATA[{doc.content}]]></content> for ch in chunks:
</chunk> ch_content = ch["content"]
</chunks> ch_id = ch["chunk_id"]
<document_info> if ch_id is None:
<id>{doc.id}</id> parts.append(f" <chunk><![CDATA[{ch_content}]]></chunk>")
<title><![CDATA[{doc.title}]]></title> else:
<document_type>{doc.document_type.value}</document_type> parts.append(f" <chunk id='{ch_id}'><![CDATA[{ch_content}]]></chunk>")
<metadata><![CDATA[{json.dumps(doc_metadata)}]]></metadata> parts.append("</document_content>")
</document_info> parts.append("</document>")
<source>{doc.document_type.value}</source> parts.append("")
</document>""" return "\n".join(parts).strip()
context_parts.append(xml_doc.strip())
context_parts.append("</mentioned_documents>")
return "\n".join(context_parts)
async def stream_new_chat( async def stream_new_chat(