search-kb: on-demand KB tool on the [n] spine; drop kb_matched_chunk_ids

The main agent's search_knowledge_base tool runs the hybrid spine, renders a <retrieved_context> of numbered [n] passages, and persists the registry. KB subagent prompts teach citing [n] from <document view="full"> reads (evidence.chunk_ids -> evidence.citations). Delete the now-unused search->read highlighting hand-off: the kb_matched_chunk_ids state field, its reducer default, the tool's _matched_chunk_ids writer, and the dead KnowledgePriorityMiddleware writes.
2026-06-26 21:39:43 +02:00 · 2026-06-25 15:26:39 +02:00 · 2026-06-25 15:26:39 +02:00 · c98bdea5cf
commit c98bdea5cf
parent 04a76b163b
16 changed files with 518 additions and 325 deletions
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
@ -6,9 +6,6 @@ import pytest
 from langchain_core.messages import AIMessage, HumanMessage

 from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
-    build_document_xml as _build_document_xml,
-)
 from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
    KBSearchPlan,
    KnowledgePriorityMiddleware,
@ -59,88 +56,6 @@ class TestResolveSearchTypes:
        assert result.count("FILE") == 1


-# ── _build_document_xml ────────────────────────────────────────────────
-
-
-class TestBuildDocumentXml:
-    @pytest.fixture
-    def sample_document(self):
-        return {
-            "document_id": 42,
-            "document": {
-                "id": 42,
-                "document_type": "FILE",
-                "title": "Test Doc",
-                "metadata": {"url": "https://example.com"},
-            },
-            "chunks": [
-                {"chunk_id": 101, "content": "First chunk content"},
-                {"chunk_id": 102, "content": "Second chunk content"},
-                {"chunk_id": 103, "content": "Third chunk content"},
-            ],
-        }
-
-    def test_contains_document_metadata(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_id>42</document_id>" in xml
-        assert "<document_type>FILE</document_type>" in xml
-        assert "Test Doc" in xml
-
-    def test_contains_chunk_index(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<chunk_index>" in xml
-        assert "</chunk_index>" in xml
-        assert 'chunk_id="101"' in xml
-        assert 'chunk_id="102"' in xml
-        assert 'chunk_id="103"' in xml
-
-    def test_matched_chunks_flagged_in_index(self, sample_document):
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101, 103})
-        lines = xml.split("\n")
-        for line in lines:
-            if 'chunk_id="101"' in line:
-                assert 'matched="true"' in line
-            if 'chunk_id="102"' in line:
-                assert 'matched="true"' not in line
-            if 'chunk_id="103"' in line:
-                assert 'matched="true"' in line
-
-    def test_chunk_content_in_document_content_section(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_content>" in xml
-        assert "First chunk content" in xml
-        assert "Second chunk content" in xml
-        assert "Third chunk content" in xml
-
-    def test_line_numbers_in_chunk_index_are_accurate(self, sample_document):
-        """Verify that the line ranges in chunk_index actually point to the right content."""
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101})
-        xml_lines = xml.split("\n")
-
-        for line in xml_lines:
-            if 'chunk_id="101"' in line and "lines=" in line:
-                import re
-
-                m = re.search(r'lines="(\d+)-(\d+)"', line)
-                assert m, f"No lines= attribute found in: {line}"
-                start, _end = int(m.group(1)), int(m.group(2))
-                target_line = xml_lines[start - 1]
-                assert "101" in target_line
-                assert "First chunk content" in target_line
-                break
-        else:
-            pytest.fail("chunk_id=101 entry not found in chunk_index")
-
-    def test_splits_into_lines_correctly(self, sample_document):
-        """Each chunk occupies exactly one line (no embedded newlines)."""
-        xml = _build_document_xml(sample_document)
-        lines = xml.split("\n")
-        chunk_lines = [
-            line for line in lines if "<![CDATA[" in line and "<chunk" in line
-        ]
-        assert len(chunk_lines) == 3
-
-
 # ── planner parsing / date normalization ───────────────────────────────