trustgraph/tests/unit/test_retrieval/test_document_rag_service.py

"""
Unit test for DocumentRAG service parameter passing fix.
Tests that user and collection parameters from the message are correctly
passed to the DocumentRag.query() method.
"""

import pytest
from unittest.mock import MagicMock, AsyncMock, patch, ANY

from trustgraph.retrieval.document_rag.rag import Processor
from trustgraph.schema import DocumentRagQuery, DocumentRagResponse


class TestDocumentRagService:
    """Test DocumentRAG service parameter passing"""

    @patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
    @pytest.mark.asyncio
    async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
        """
        Test that user and collection from message are passed to DocumentRag.query().
        
        This is a regression test for the bug where user/collection parameters
        were ignored, causing wrong collection names like 'd_trustgraph_default_384'
        instead of 'd_my_user_test_coll_1_384'.
        """
        # Setup processor
        processor = Processor(
            taskgroup=MagicMock(),
            id="test-processor",
            doc_limit=10
        )
        
        # Setup mock DocumentRag instance
        mock_rag_instance = AsyncMock()
        mock_document_rag_class.return_value = mock_rag_instance
        mock_rag_instance.query.return_value = "test response"
        
        # Setup message with custom user/collection
        msg = MagicMock()
        msg.value.return_value = DocumentRagQuery(
            query="test query",
            user="my_user",        # Custom user (not default "trustgraph")  
            collection="test_coll_1",  # Custom collection (not default "default")
            doc_limit=5
        )
        msg.properties.return_value = {"id": "test-id"}
        
        # Setup flow mock
        consumer = MagicMock()
        flow = MagicMock()
        
        # Mock flow to return AsyncMock for clients and response producer
        mock_producer = AsyncMock()
        def flow_router(service_name):
            if service_name == "response":
                return mock_producer
            return AsyncMock()  # embeddings, doc-embeddings, prompt clients
        flow.side_effect = flow_router
        
        # Execute
        await processor.on_request(msg, consumer, flow)
        
        # Verify: DocumentRag.query was called with correct parameters
        mock_rag_instance.query.assert_called_once_with(
            "test query",
            user="my_user",           # Must be from message, not hardcoded default
            collection="test_coll_1", # Must be from message, not hardcoded default
            doc_limit=5,
            explain_callback=ANY,     # Explainability callback is always passed
            save_answer_callback=ANY, # Librarian save callback is always passed
        )
        
        # Verify response was sent
        mock_producer.send.assert_called_once()
        sent_response = mock_producer.send.call_args[0][0]
        assert isinstance(sent_response, DocumentRagResponse)
        assert sent_response.response == "test response"
        assert sent_response.error is None

    @patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
    @pytest.mark.asyncio
    async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_document_rag_class):
        """
        Test that non-streaming mode sets end_of_stream=True in response.

        This is a regression test for the bug where non-streaming responses
        didn't set end_of_stream, causing clients to hang waiting for more data.
        """
        # Setup processor
        processor = Processor(
            taskgroup=MagicMock(),
            id="test-processor",
            doc_limit=10
        )

        # Setup mock DocumentRag instance
        mock_rag_instance = AsyncMock()
        mock_document_rag_class.return_value = mock_rag_instance
        mock_rag_instance.query.return_value = "A document about cats."

        # Setup message with non-streaming request
        msg = MagicMock()
        msg.value.return_value = DocumentRagQuery(
            query="What is a cat?",
            user="trustgraph",
            collection="default",
            doc_limit=10,
            streaming=False  # Non-streaming mode
        )
        msg.properties.return_value = {"id": "test-id"}

        # Setup flow mock
        consumer = MagicMock()
        flow = MagicMock()

        mock_producer = AsyncMock()
        def flow_router(service_name):
            if service_name == "response":
                return mock_producer
            return AsyncMock()
        flow.side_effect = flow_router

        # Execute
        await processor.on_request(msg, consumer, flow)

        # Verify: response was sent with end_of_stream=True
        mock_producer.send.assert_called_once()
        sent_response = mock_producer.send.call_args[0][0]
        assert isinstance(sent_response, DocumentRagResponse)
        assert sent_response.response == "A document about cats."
        assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True"
        assert sent_response.error is None
Fix/document rag (#506) * Fix missing document RAG user/collection params * Added test 2025-09-09 15:30:11 +01:00			`"""`
			`Unit test for DocumentRAG service parameter passing fix.`
			`Tests that user and collection parameters from the message are correctly`
			`passed to the DocumentRag.query() method.`
			`"""`

			`import pytest`
Adding explainability to the ReACT agent (#689) * Added tech spec * Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Agent traces record: - Session start with query and timestamp - Each iteration's thought, action, arguments, and observation - Final answer with derivation chain Changes: - Add session_id and collection fields to AgentRequest schema - Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces - Create agent provenance triple generators in provenance/agent.py - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render agent traces alongside GraphRAG * Updated explainability taxonomy: GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis Agent: tg:Question → tg:Analysis(s) → tg:Conclusion All entities also have their PROV-O type (prov:Activity or prov:Entity). Updated commit message: Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Entity types follow human reasoning patterns: - tg:Question - the user's query (shared with GraphRAG) - tg:Analysis - each think/act/observe cycle - tg:Conclusion - the final answer Also adds explicit TG types to GraphRAG entities: - tg:Question, tg:Exploration, tg:Focus, tg:Synthesis All types retain their PROV-O base types (prov:Activity, prov:Entity). Changes: - Add session_id and collection fields to AgentRequest schema - Add explainability entity types to namespaces.py - Create agent provenance triple generators - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render both trace types * Document RAG explainability is now complete. Here's a summary of the changes made: Schema Changes: - trustgraph-base/trustgraph/schema/services/retrieval.py: Added explain_id and explain_graph fields to DocumentRagResponse - trustgraph-base/trustgraph/messaging/translators/retrieval.py: Updated translator to handle explainability fields Provenance Changes: - trustgraph-base/trustgraph/provenance/namespaces.py: Added TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates - trustgraph-base/trustgraph/provenance/uris.py: Added docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri generators - trustgraph-base/trustgraph/provenance/triples.py: Added docrag_question_triples, docrag_exploration_triples, docrag_synthesis_triples builders - trustgraph-base/trustgraph/provenance/__init__.py: Exported all new Document RAG functions and predicates Service Changes: - trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py: Added explainability callback support and triple emission at each phase (Question → Exploration → Synthesis) - trustgraph-flow/trustgraph/retrieval/document_rag/rag.py: Registered explainability producer and wired up the callback Documentation: - docs/tech-specs/agent-explainability.md: Added Document RAG entity types and provenance model documentation Document RAG Provenance Model: Question (urn:trustgraph:docrag:{uuid}) │ │ tg:query, prov:startedAtTime │ rdf:type = prov:Activity, tg:Question │ ↓ prov:wasGeneratedBy │ Exploration (urn:trustgraph:docrag:{uuid}/exploration) │ │ tg:chunkCount, tg:selectedChunk (multiple) │ rdf:type = prov:Entity, tg:Exploration │ ↓ prov:wasDerivedFrom │ Synthesis (urn:trustgraph:docrag:{uuid}/synthesis) │ │ tg:content = "The answer..." │ rdf:type = prov:Entity, tg:Synthesis * Specific subtype that makes the retrieval mechanism immediately obvious: System: GraphRAG TG Types on Question: tg:Question, tg:GraphRagQuestion URI Pattern: urn:trustgraph:question:{uuid} ──────────────────────────────────────── System: Document RAG TG Types on Question: tg:Question, tg:DocRagQuestion URI Pattern: urn:trustgraph:docrag:{uuid} ──────────────────────────────────────── System: Agent TG Types on Question: tg:Question, tg:AgentQuestion URI Pattern: urn:trustgraph:agent:{uuid} Files modified: - trustgraph-base/trustgraph/provenance/namespaces.py - Added TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION - trustgraph-base/trustgraph/provenance/triples.py - Added subtype to question_triples and docrag_question_triples - trustgraph-base/trustgraph/provenance/agent.py - Added subtype to agent_session_triples - trustgraph-base/trustgraph/provenance/__init__.py - Exported new types - docs/tech-specs/agent-explainability.md - Documented the subtypes This allows: - Query all questions: ?q rdf:type tg:Question - Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion - Query only Document RAG: ?q rdf:type tg:DocRagQuestion - Query only Agent: ?q rdf:type tg:AgentQuestion * Fixed tests 2026-03-11 15:28:15 +00:00			`from unittest.mock import MagicMock, AsyncMock, patch, ANY`
Fix/document rag (#506) * Fix missing document RAG user/collection params * Added test 2025-09-09 15:30:11 +01:00
			`from trustgraph.retrieval.document_rag.rag import Processor`
			`from trustgraph.schema import DocumentRagQuery, DocumentRagResponse`


			`class TestDocumentRagService:`
			`"""Test DocumentRAG service parameter passing"""`

			`@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')`
			`@pytest.mark.asyncio`
			`async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):`
			`"""`
			`Test that user and collection from message are passed to DocumentRag.query().`

			`This is a regression test for the bug where user/collection parameters`
			`were ignored, causing wrong collection names like 'd_trustgraph_default_384'`
			`instead of 'd_my_user_test_coll_1_384'.`
			`"""`
			`# Setup processor`
			`processor = Processor(`
			`taskgroup=MagicMock(),`
			`id="test-processor",`
			`doc_limit=10`
			`)`

			`# Setup mock DocumentRag instance`
			`mock_rag_instance = AsyncMock()`
			`mock_document_rag_class.return_value = mock_rag_instance`
			`mock_rag_instance.query.return_value = "test response"`

			`# Setup message with custom user/collection`
			`msg = MagicMock()`
			`msg.value.return_value = DocumentRagQuery(`
			`query="test query",`
			`user="my_user", # Custom user (not default "trustgraph")`
			`collection="test_coll_1", # Custom collection (not default "default")`
			`doc_limit=5`
			`)`
			`msg.properties.return_value = {"id": "test-id"}`

			`# Setup flow mock`
			`consumer = MagicMock()`
			`flow = MagicMock()`

			`# Mock flow to return AsyncMock for clients and response producer`
			`mock_producer = AsyncMock()`
			`def flow_router(service_name):`
			`if service_name == "response":`
			`return mock_producer`
			`return AsyncMock() # embeddings, doc-embeddings, prompt clients`
			`flow.side_effect = flow_router`

			`# Execute`
			`await processor.on_request(msg, consumer, flow)`

			`# Verify: DocumentRag.query was called with correct parameters`
			`mock_rag_instance.query.assert_called_once_with(`
			`"test query",`
			`user="my_user", # Must be from message, not hardcoded default`
Adding explainability to the ReACT agent (#689) * Added tech spec * Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Agent traces record: - Session start with query and timestamp - Each iteration's thought, action, arguments, and observation - Final answer with derivation chain Changes: - Add session_id and collection fields to AgentRequest schema - Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces - Create agent provenance triple generators in provenance/agent.py - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render agent traces alongside GraphRAG * Updated explainability taxonomy: GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis Agent: tg:Question → tg:Analysis(s) → tg:Conclusion All entities also have their PROV-O type (prov:Activity or prov:Entity). Updated commit message: Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Entity types follow human reasoning patterns: - tg:Question - the user's query (shared with GraphRAG) - tg:Analysis - each think/act/observe cycle - tg:Conclusion - the final answer Also adds explicit TG types to GraphRAG entities: - tg:Question, tg:Exploration, tg:Focus, tg:Synthesis All types retain their PROV-O base types (prov:Activity, prov:Entity). Changes: - Add session_id and collection fields to AgentRequest schema - Add explainability entity types to namespaces.py - Create agent provenance triple generators - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render both trace types * Document RAG explainability is now complete. Here's a summary of the changes made: Schema Changes: - trustgraph-base/trustgraph/schema/services/retrieval.py: Added explain_id and explain_graph fields to DocumentRagResponse - trustgraph-base/trustgraph/messaging/translators/retrieval.py: Updated translator to handle explainability fields Provenance Changes: - trustgraph-base/trustgraph/provenance/namespaces.py: Added TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates - trustgraph-base/trustgraph/provenance/uris.py: Added docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri generators - trustgraph-base/trustgraph/provenance/triples.py: Added docrag_question_triples, docrag_exploration_triples, docrag_synthesis_triples builders - trustgraph-base/trustgraph/provenance/__init__.py: Exported all new Document RAG functions and predicates Service Changes: - trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py: Added explainability callback support and triple emission at each phase (Question → Exploration → Synthesis) - trustgraph-flow/trustgraph/retrieval/document_rag/rag.py: Registered explainability producer and wired up the callback Documentation: - docs/tech-specs/agent-explainability.md: Added Document RAG entity types and provenance model documentation Document RAG Provenance Model: Question (urn:trustgraph:docrag:{uuid}) │ │ tg:query, prov:startedAtTime │ rdf:type = prov:Activity, tg:Question │ ↓ prov:wasGeneratedBy │ Exploration (urn:trustgraph:docrag:{uuid}/exploration) │ │ tg:chunkCount, tg:selectedChunk (multiple) │ rdf:type = prov:Entity, tg:Exploration │ ↓ prov:wasDerivedFrom │ Synthesis (urn:trustgraph:docrag:{uuid}/synthesis) │ │ tg:content = "The answer..." │ rdf:type = prov:Entity, tg:Synthesis * Specific subtype that makes the retrieval mechanism immediately obvious: System: GraphRAG TG Types on Question: tg:Question, tg:GraphRagQuestion URI Pattern: urn:trustgraph:question:{uuid} ──────────────────────────────────────── System: Document RAG TG Types on Question: tg:Question, tg:DocRagQuestion URI Pattern: urn:trustgraph:docrag:{uuid} ──────────────────────────────────────── System: Agent TG Types on Question: tg:Question, tg:AgentQuestion URI Pattern: urn:trustgraph:agent:{uuid} Files modified: - trustgraph-base/trustgraph/provenance/namespaces.py - Added TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION - trustgraph-base/trustgraph/provenance/triples.py - Added subtype to question_triples and docrag_question_triples - trustgraph-base/trustgraph/provenance/agent.py - Added subtype to agent_session_triples - trustgraph-base/trustgraph/provenance/__init__.py - Exported new types - docs/tech-specs/agent-explainability.md - Documented the subtypes This allows: - Query all questions: ?q rdf:type tg:Question - Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion - Query only Document RAG: ?q rdf:type tg:DocRagQuestion - Query only Agent: ?q rdf:type tg:AgentQuestion * Fixed tests 2026-03-11 15:28:15 +00:00			`collection="test_coll_1", # Must be from message, not hardcoded default`
			`doc_limit=5,`
			`explain_callback=ANY, # Explainability callback is always passed`
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests. 2026-03-12 21:40:09 +00:00			`save_answer_callback=ANY, # Librarian save callback is always passed`
Fix/document rag (#506) * Fix missing document RAG user/collection params * Added test 2025-09-09 15:30:11 +01:00			`)`

			`# Verify response was sent`
			`mock_producer.send.assert_called_once()`
			`sent_response = mock_producer.send.call_args[0][0]`
			`assert isinstance(sent_response, DocumentRagResponse)`
			`assert sent_response.response == "test response"`
Fix non streaming RAG problems (#607) * Fix non-streaming failure in RAG services * Fix non-streaming failure in API * Fix agent non-streaming messaging * Agent messaging unit & contract tests 2026-01-12 18:45:52 +00:00			`assert sent_response.error is None`

			`@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')`
			`@pytest.mark.asyncio`
			`async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_document_rag_class):`
			`"""`
			`Test that non-streaming mode sets end_of_stream=True in response.`

			`This is a regression test for the bug where non-streaming responses`
			`didn't set end_of_stream, causing clients to hang waiting for more data.`
			`"""`
			`# Setup processor`
			`processor = Processor(`
			`taskgroup=MagicMock(),`
			`id="test-processor",`
			`doc_limit=10`
			`)`

			`# Setup mock DocumentRag instance`
			`mock_rag_instance = AsyncMock()`
			`mock_document_rag_class.return_value = mock_rag_instance`
			`mock_rag_instance.query.return_value = "A document about cats."`

			`# Setup message with non-streaming request`
			`msg = MagicMock()`
			`msg.value.return_value = DocumentRagQuery(`
			`query="What is a cat?",`
			`user="trustgraph",`
			`collection="default",`
			`doc_limit=10,`
			`streaming=False # Non-streaming mode`
			`)`
			`msg.properties.return_value = {"id": "test-id"}`

			`# Setup flow mock`
			`consumer = MagicMock()`
			`flow = MagicMock()`

			`mock_producer = AsyncMock()`
			`def flow_router(service_name):`
			`if service_name == "response":`
			`return mock_producer`
			`return AsyncMock()`
			`flow.side_effect = flow_router`

			`# Execute`
			`await processor.on_request(msg, consumer, flow)`

			`# Verify: response was sent with end_of_stream=True`
			`mock_producer.send.assert_called_once()`
			`sent_response = mock_producer.send.call_args[0][0]`
			`assert isinstance(sent_response, DocumentRagResponse)`
			`assert sent_response.response == "A document about cats."`
			`assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True"`
Fix/document rag (#506) * Fix missing document RAG user/collection params * Added test 2025-09-09 15:30:11 +01:00			`assert sent_response.error is None`