2025-09-09 15:30:11 +01:00
|
|
|
"""
|
|
|
|
|
Unit test for DocumentRAG service parameter passing fix.
|
|
|
|
|
Tests that user and collection parameters from the message are correctly
|
|
|
|
|
passed to the DocumentRag.query() method.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import pytest
|
Adding explainability to the ReACT agent (#689)
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
2026-03-11 15:28:15 +00:00
|
|
|
from unittest.mock import MagicMock, AsyncMock, patch, ANY
|
2025-09-09 15:30:11 +01:00
|
|
|
|
|
|
|
|
from trustgraph.retrieval.document_rag.rag import Processor
|
|
|
|
|
from trustgraph.schema import DocumentRagQuery, DocumentRagResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDocumentRagService:
|
|
|
|
|
"""Test DocumentRAG service parameter passing"""
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
|
|
|
|
|
"""
|
|
|
|
|
Test that user and collection from message are passed to DocumentRag.query().
|
|
|
|
|
|
|
|
|
|
This is a regression test for the bug where user/collection parameters
|
|
|
|
|
were ignored, causing wrong collection names like 'd_trustgraph_default_384'
|
|
|
|
|
instead of 'd_my_user_test_coll_1_384'.
|
|
|
|
|
"""
|
|
|
|
|
# Setup processor
|
|
|
|
|
processor = Processor(
|
|
|
|
|
taskgroup=MagicMock(),
|
|
|
|
|
id="test-processor",
|
|
|
|
|
doc_limit=10
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Setup mock DocumentRag instance
|
|
|
|
|
mock_rag_instance = AsyncMock()
|
|
|
|
|
mock_document_rag_class.return_value = mock_rag_instance
|
|
|
|
|
mock_rag_instance.query.return_value = "test response"
|
|
|
|
|
|
|
|
|
|
# Setup message with custom user/collection
|
|
|
|
|
msg = MagicMock()
|
|
|
|
|
msg.value.return_value = DocumentRagQuery(
|
|
|
|
|
query="test query",
|
|
|
|
|
user="my_user", # Custom user (not default "trustgraph")
|
|
|
|
|
collection="test_coll_1", # Custom collection (not default "default")
|
|
|
|
|
doc_limit=5
|
|
|
|
|
)
|
|
|
|
|
msg.properties.return_value = {"id": "test-id"}
|
|
|
|
|
|
|
|
|
|
# Setup flow mock
|
|
|
|
|
consumer = MagicMock()
|
|
|
|
|
flow = MagicMock()
|
|
|
|
|
|
|
|
|
|
# Mock flow to return AsyncMock for clients and response producer
|
|
|
|
|
mock_producer = AsyncMock()
|
|
|
|
|
def flow_router(service_name):
|
|
|
|
|
if service_name == "response":
|
|
|
|
|
return mock_producer
|
|
|
|
|
return AsyncMock() # embeddings, doc-embeddings, prompt clients
|
|
|
|
|
flow.side_effect = flow_router
|
|
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
|
await processor.on_request(msg, consumer, flow)
|
|
|
|
|
|
|
|
|
|
# Verify: DocumentRag.query was called with correct parameters
|
|
|
|
|
mock_rag_instance.query.assert_called_once_with(
|
|
|
|
|
"test query",
|
|
|
|
|
user="my_user", # Must be from message, not hardcoded default
|
Adding explainability to the ReACT agent (#689)
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
2026-03-11 15:28:15 +00:00
|
|
|
collection="test_coll_1", # Must be from message, not hardcoded default
|
|
|
|
|
doc_limit=5,
|
|
|
|
|
explain_callback=ANY, # Explainability callback is always passed
|
Add unified explainability support and librarian storage for (#693)
Add unified explainability support and librarian storage for all retrieval engines
Implements consistent explainability/provenance tracking
across GraphRAG, DocumentRAG, and Agent retrieval
engines. All large content (answers, thoughts, observations)
is now stored in librarian rather than as inline literals in
the knowledge graph.
Explainability API:
- New explainability.py module with entity classes (Question,
Exploration, Focus, Synthesis, Analysis, Conclusion) and
ExplainabilityClient
- Quiescence-based eventual consistency handling for trace
fetching
- Content fetching from librarian with retry logic
CLI updates:
- tg-invoke-graph-rag -x/--explainable flag returns
explain_id
- tg-invoke-document-rag -x/--explainable flag returns
explain_id
- tg-invoke-agent -x/--explainable flag returns explain_id
- tg-list-explain-traces uses new explainability API
- tg-show-explain-trace handles all three trace types
Agent provenance:
- Records session, iterations (think/act/observe), and conclusion
- Stores thoughts and observations in librarian with document
references
- New predicates: tg:thoughtDocument, tg:observationDocument
DocumentRAG provenance:
- Records question, exploration (chunk retrieval), and synthesis
- Stores answers in librarian with document references
Schema changes:
- AgentResponse: added explain_id, explain_graph fields
- RetrievalResponse: added explain_id, explain_graph fields
- agent_iteration_triples: supports thought_document_id,
observation_document_id
Update tests.
2026-03-12 21:40:09 +00:00
|
|
|
save_answer_callback=ANY, # Librarian save callback is always passed
|
2025-09-09 15:30:11 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Verify response was sent
|
|
|
|
|
mock_producer.send.assert_called_once()
|
|
|
|
|
sent_response = mock_producer.send.call_args[0][0]
|
|
|
|
|
assert isinstance(sent_response, DocumentRagResponse)
|
|
|
|
|
assert sent_response.response == "test response"
|
2026-01-12 18:45:52 +00:00
|
|
|
assert sent_response.error is None
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_document_rag_class):
|
|
|
|
|
"""
|
|
|
|
|
Test that non-streaming mode sets end_of_stream=True in response.
|
|
|
|
|
|
|
|
|
|
This is a regression test for the bug where non-streaming responses
|
|
|
|
|
didn't set end_of_stream, causing clients to hang waiting for more data.
|
|
|
|
|
"""
|
|
|
|
|
# Setup processor
|
|
|
|
|
processor = Processor(
|
|
|
|
|
taskgroup=MagicMock(),
|
|
|
|
|
id="test-processor",
|
|
|
|
|
doc_limit=10
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Setup mock DocumentRag instance
|
|
|
|
|
mock_rag_instance = AsyncMock()
|
|
|
|
|
mock_document_rag_class.return_value = mock_rag_instance
|
|
|
|
|
mock_rag_instance.query.return_value = "A document about cats."
|
|
|
|
|
|
|
|
|
|
# Setup message with non-streaming request
|
|
|
|
|
msg = MagicMock()
|
|
|
|
|
msg.value.return_value = DocumentRagQuery(
|
|
|
|
|
query="What is a cat?",
|
|
|
|
|
user="trustgraph",
|
|
|
|
|
collection="default",
|
|
|
|
|
doc_limit=10,
|
|
|
|
|
streaming=False # Non-streaming mode
|
|
|
|
|
)
|
|
|
|
|
msg.properties.return_value = {"id": "test-id"}
|
|
|
|
|
|
|
|
|
|
# Setup flow mock
|
|
|
|
|
consumer = MagicMock()
|
|
|
|
|
flow = MagicMock()
|
|
|
|
|
|
|
|
|
|
mock_producer = AsyncMock()
|
|
|
|
|
def flow_router(service_name):
|
|
|
|
|
if service_name == "response":
|
|
|
|
|
return mock_producer
|
|
|
|
|
return AsyncMock()
|
|
|
|
|
flow.side_effect = flow_router
|
|
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
|
await processor.on_request(msg, consumer, flow)
|
|
|
|
|
|
|
|
|
|
# Verify: response was sent with end_of_stream=True
|
|
|
|
|
mock_producer.send.assert_called_once()
|
|
|
|
|
sent_response = mock_producer.send.call_args[0][0]
|
|
|
|
|
assert isinstance(sent_response, DocumentRagResponse)
|
|
|
|
|
assert sent_response.response == "A document about cats."
|
|
|
|
|
assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True"
|
2025-09-09 15:30:11 +01:00
|
|
|
assert sent_response.error is None
|