2025-09-09 15:30:11 +01:00
|
|
|
"""
|
|
|
|
|
Unit test for DocumentRAG service parameter passing fix.
|
|
|
|
|
Tests that user and collection parameters from the message are correctly
|
|
|
|
|
passed to the DocumentRag.query() method.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import pytest
|
Adding explainability to the ReACT agent (#689)
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
2026-03-11 15:28:15 +00:00
|
|
|
from unittest.mock import MagicMock, AsyncMock, patch, ANY
|
2025-09-09 15:30:11 +01:00
|
|
|
|
|
|
|
|
from trustgraph.retrieval.document_rag.rag import Processor
|
|
|
|
|
from trustgraph.schema import DocumentRagQuery, DocumentRagResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDocumentRagService:
|
|
|
|
|
"""Test DocumentRAG service parameter passing"""
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
|
|
|
|
|
"""
|
|
|
|
|
Test that user and collection from message are passed to DocumentRag.query().
|
|
|
|
|
|
|
|
|
|
This is a regression test for the bug where user/collection parameters
|
|
|
|
|
were ignored, causing wrong collection names like 'd_trustgraph_default_384'
|
|
|
|
|
instead of 'd_my_user_test_coll_1_384'.
|
|
|
|
|
"""
|
|
|
|
|
# Setup processor
|
|
|
|
|
processor = Processor(
|
|
|
|
|
taskgroup=MagicMock(),
|
|
|
|
|
id="test-processor",
|
|
|
|
|
doc_limit=10
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Setup mock DocumentRag instance
|
|
|
|
|
mock_rag_instance = AsyncMock()
|
|
|
|
|
mock_document_rag_class.return_value = mock_rag_instance
|
|
|
|
|
mock_rag_instance.query.return_value = "test response"
|
|
|
|
|
|
|
|
|
|
# Setup message with custom user/collection
|
|
|
|
|
msg = MagicMock()
|
|
|
|
|
msg.value.return_value = DocumentRagQuery(
|
|
|
|
|
query="test query",
|
|
|
|
|
user="my_user", # Custom user (not default "trustgraph")
|
|
|
|
|
collection="test_coll_1", # Custom collection (not default "default")
|
|
|
|
|
doc_limit=5
|
|
|
|
|
)
|
|
|
|
|
msg.properties.return_value = {"id": "test-id"}
|
|
|
|
|
|
|
|
|
|
# Setup flow mock
|
|
|
|
|
consumer = MagicMock()
|
|
|
|
|
flow = MagicMock()
|
|
|
|
|
|
|
|
|
|
# Mock flow to return AsyncMock for clients and response producer
|
|
|
|
|
mock_producer = AsyncMock()
|
|
|
|
|
def flow_router(service_name):
|
|
|
|
|
if service_name == "response":
|
|
|
|
|
return mock_producer
|
|
|
|
|
return AsyncMock() # embeddings, doc-embeddings, prompt clients
|
|
|
|
|
flow.side_effect = flow_router
|
|
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
|
await processor.on_request(msg, consumer, flow)
|
|
|
|
|
|
|
|
|
|
# Verify: DocumentRag.query was called with correct parameters
|
|
|
|
|
mock_rag_instance.query.assert_called_once_with(
|
|
|
|
|
"test query",
|
|
|
|
|
user="my_user", # Must be from message, not hardcoded default
|
Adding explainability to the ReACT agent (#689)
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
2026-03-11 15:28:15 +00:00
|
|
|
collection="test_coll_1", # Must be from message, not hardcoded default
|
|
|
|
|
doc_limit=5,
|
|
|
|
|
explain_callback=ANY, # Explainability callback is always passed
|
2025-09-09 15:30:11 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Verify response was sent
|
|
|
|
|
mock_producer.send.assert_called_once()
|
|
|
|
|
sent_response = mock_producer.send.call_args[0][0]
|
|
|
|
|
assert isinstance(sent_response, DocumentRagResponse)
|
|
|
|
|
assert sent_response.response == "test response"
|
2026-01-12 18:45:52 +00:00
|
|
|
assert sent_response.error is None
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_document_rag_class):
|
|
|
|
|
"""
|
|
|
|
|
Test that non-streaming mode sets end_of_stream=True in response.
|
|
|
|
|
|
|
|
|
|
This is a regression test for the bug where non-streaming responses
|
|
|
|
|
didn't set end_of_stream, causing clients to hang waiting for more data.
|
|
|
|
|
"""
|
|
|
|
|
# Setup processor
|
|
|
|
|
processor = Processor(
|
|
|
|
|
taskgroup=MagicMock(),
|
|
|
|
|
id="test-processor",
|
|
|
|
|
doc_limit=10
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Setup mock DocumentRag instance
|
|
|
|
|
mock_rag_instance = AsyncMock()
|
|
|
|
|
mock_document_rag_class.return_value = mock_rag_instance
|
|
|
|
|
mock_rag_instance.query.return_value = "A document about cats."
|
|
|
|
|
|
|
|
|
|
# Setup message with non-streaming request
|
|
|
|
|
msg = MagicMock()
|
|
|
|
|
msg.value.return_value = DocumentRagQuery(
|
|
|
|
|
query="What is a cat?",
|
|
|
|
|
user="trustgraph",
|
|
|
|
|
collection="default",
|
|
|
|
|
doc_limit=10,
|
|
|
|
|
streaming=False # Non-streaming mode
|
|
|
|
|
)
|
|
|
|
|
msg.properties.return_value = {"id": "test-id"}
|
|
|
|
|
|
|
|
|
|
# Setup flow mock
|
|
|
|
|
consumer = MagicMock()
|
|
|
|
|
flow = MagicMock()
|
|
|
|
|
|
|
|
|
|
mock_producer = AsyncMock()
|
|
|
|
|
def flow_router(service_name):
|
|
|
|
|
if service_name == "response":
|
|
|
|
|
return mock_producer
|
|
|
|
|
return AsyncMock()
|
|
|
|
|
flow.side_effect = flow_router
|
|
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
|
await processor.on_request(msg, consumer, flow)
|
|
|
|
|
|
|
|
|
|
# Verify: response was sent with end_of_stream=True
|
|
|
|
|
mock_producer.send.assert_called_once()
|
|
|
|
|
sent_response = mock_producer.send.call_args[0][0]
|
|
|
|
|
assert isinstance(sent_response, DocumentRagResponse)
|
|
|
|
|
assert sent_response.response == "A document about cats."
|
|
|
|
|
assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True"
|
2025-09-09 15:30:11 +01:00
|
|
|
assert sent_response.error is None
|