trustgraph/tests/unit/test_retrieval/test_document_rag.py

633 lines
23 KiB
Python
Raw Normal View History

"""
Tests for DocumentRAG retrieval implementation
"""
import pytest
from unittest.mock import MagicMock, AsyncMock
from trustgraph.retrieval.document_rag.document_rag import DocumentRag, Query
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
from trustgraph.base import PromptResult
2026-03-07 23:38:09 +00:00
# Sample chunk content mapping for tests
CHUNK_CONTENT = {
"doc/c1": "Document 1 content",
"doc/c2": "Document 2 content",
"doc/c3": "Relevant document content",
"doc/c4": "Another document",
"doc/c5": "Default doc",
"doc/c6": "Verbose test doc",
"doc/c7": "Verbose doc content",
"doc/ml1": "Machine learning is a subset of artificial intelligence...",
"doc/ml2": "ML algorithms learn patterns from data to make predictions...",
"doc/ml3": "Common ML techniques include supervised and unsupervised learning...",
}
@pytest.fixture
def mock_fetch_chunk():
"""Create a mock fetch_chunk function"""
async def fetch(chunk_id, user):
return CHUNK_CONTENT.get(chunk_id, f"Content for {chunk_id}")
return fetch
class TestDocumentRag:
"""Test cases for DocumentRag class"""
2026-03-07 23:38:09 +00:00
def test_document_rag_initialization_with_defaults(self, mock_fetch_chunk):
"""Test DocumentRag initialization with default verbose setting"""
# Create mock clients
mock_prompt_client = MagicMock()
mock_embeddings_client = MagicMock()
mock_doc_embeddings_client = MagicMock()
2026-03-07 23:38:09 +00:00
# Initialize DocumentRag
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
2026-03-07 23:38:09 +00:00
doc_embeddings_client=mock_doc_embeddings_client,
fetch_chunk=mock_fetch_chunk
)
2026-03-07 23:38:09 +00:00
# Verify initialization
assert document_rag.prompt_client == mock_prompt_client
assert document_rag.embeddings_client == mock_embeddings_client
assert document_rag.doc_embeddings_client == mock_doc_embeddings_client
2026-03-07 23:38:09 +00:00
assert document_rag.fetch_chunk == mock_fetch_chunk
assert document_rag.verbose is False # Default value
2026-03-07 23:38:09 +00:00
def test_document_rag_initialization_with_verbose(self, mock_fetch_chunk):
"""Test DocumentRag initialization with verbose enabled"""
# Create mock clients
mock_prompt_client = MagicMock()
mock_embeddings_client = MagicMock()
mock_doc_embeddings_client = MagicMock()
2026-03-07 23:38:09 +00:00
# Initialize DocumentRag with verbose=True
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
doc_embeddings_client=mock_doc_embeddings_client,
2026-03-07 23:38:09 +00:00
fetch_chunk=mock_fetch_chunk,
verbose=True
)
2026-03-07 23:38:09 +00:00
# Verify initialization
assert document_rag.prompt_client == mock_prompt_client
assert document_rag.embeddings_client == mock_embeddings_client
assert document_rag.doc_embeddings_client == mock_doc_embeddings_client
2026-03-07 23:38:09 +00:00
assert document_rag.fetch_chunk == mock_fetch_chunk
assert document_rag.verbose is True
class TestQuery:
"""Test cases for Query class"""
def test_query_initialization_with_defaults(self):
"""Test Query initialization with default parameters"""
# Create mock DocumentRag
mock_rag = MagicMock()
2026-03-07 23:38:09 +00:00
# Initialize Query with defaults
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False
)
2026-03-07 23:38:09 +00:00
# Verify initialization
assert query.rag == mock_rag
assert query.user == "test_user"
assert query.collection == "test_collection"
assert query.verbose is False
assert query.doc_limit == 20 # Default value
def test_query_initialization_with_custom_doc_limit(self):
"""Test Query initialization with custom doc_limit"""
# Create mock DocumentRag
mock_rag = MagicMock()
2026-03-07 23:38:09 +00:00
# Initialize Query with custom doc_limit
query = Query(
rag=mock_rag,
user="custom_user",
collection="custom_collection",
verbose=True,
doc_limit=50
)
2026-03-07 23:38:09 +00:00
# Verify initialization
assert query.rag == mock_rag
assert query.user == "custom_user"
assert query.collection == "custom_collection"
assert query.verbose is True
assert query.doc_limit == 50
@pytest.mark.asyncio
async def test_extract_concepts(self):
"""Test Query.extract_concepts extracts concepts from query"""
mock_rag = MagicMock()
mock_prompt_client = AsyncMock()
mock_rag.prompt_client = mock_prompt_client
# Mock the prompt response with concept lines
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="machine learning\nartificial intelligence\ndata patterns")
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False
)
result = await query.extract_concepts("What is machine learning?")
mock_prompt_client.prompt.assert_called_once_with(
"extract-concepts",
variables={"query": "What is machine learning?"}
)
assert result == ["machine learning", "artificial intelligence", "data patterns"]
@pytest.mark.asyncio
async def test_extract_concepts_fallback_to_raw_query(self):
"""Test Query.extract_concepts falls back to raw query when no concepts extracted"""
mock_rag = MagicMock()
mock_prompt_client = AsyncMock()
mock_rag.prompt_client = mock_prompt_client
# Mock empty response
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="")
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False
)
result = await query.extract_concepts("What is ML?")
assert result == ["What is ML?"]
@pytest.mark.asyncio
async def test_get_vectors_method(self):
"""Test Query.get_vectors method calls embeddings client correctly"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
2026-03-07 23:38:09 +00:00
# Mock the embed method - returns vectors for each concept
expected_vectors = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
mock_embeddings_client.embed.return_value = expected_vectors
2026-03-07 23:38:09 +00:00
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False
)
2026-03-07 23:38:09 +00:00
concepts = ["machine learning", "data patterns"]
result = await query.get_vectors(concepts)
2026-03-07 23:38:09 +00:00
mock_embeddings_client.embed.assert_called_once_with(concepts)
assert result == expected_vectors
@pytest.mark.asyncio
async def test_get_docs_method(self):
"""Test Query.get_docs method retrieves documents correctly"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
mock_rag.doc_embeddings_client = mock_doc_embeddings_client
2026-03-07 23:38:09 +00:00
# Mock fetch_chunk function
async def mock_fetch(chunk_id, user):
return CHUNK_CONTENT.get(chunk_id, f"Content for {chunk_id}")
mock_rag.fetch_chunk = mock_fetch
# Mock embeddings - one vector per concept
mock_embeddings_client.embed.return_value = [[0.1, 0.2, 0.3]]
2026-03-07 23:38:09 +00:00
# Mock document embeddings returns ChunkMatch objects
mock_match1 = MagicMock()
mock_match1.chunk_id = "doc/c1"
mock_match1.score = 0.95
mock_match2 = MagicMock()
mock_match2.chunk_id = "doc/c2"
mock_match2.score = 0.85
mock_doc_embeddings_client.query.return_value = [mock_match1, mock_match2]
2026-03-07 23:38:09 +00:00
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False,
doc_limit=15
)
2026-03-07 23:38:09 +00:00
# Call get_docs with concepts list
concepts = ["test concept"]
result = await query.get_docs(concepts)
2026-03-07 23:38:09 +00:00
# Verify embeddings client was called with concepts
mock_embeddings_client.embed.assert_called_once_with(concepts)
2026-03-07 23:38:09 +00:00
# Verify doc embeddings client was called
mock_doc_embeddings_client.query.assert_called_once_with(
vector=[0.1, 0.2, 0.3],
limit=15,
user="test_user",
collection="test_collection"
)
2026-03-07 23:38:09 +00:00
Adding explainability to the ReACT agent (#689) * Added tech spec * Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Agent traces record: - Session start with query and timestamp - Each iteration's thought, action, arguments, and observation - Final answer with derivation chain Changes: - Add session_id and collection fields to AgentRequest schema - Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces - Create agent provenance triple generators in provenance/agent.py - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render agent traces alongside GraphRAG * Updated explainability taxonomy: GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis Agent: tg:Question → tg:Analysis(s) → tg:Conclusion All entities also have their PROV-O type (prov:Activity or prov:Entity). Updated commit message: Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Entity types follow human reasoning patterns: - tg:Question - the user's query (shared with GraphRAG) - tg:Analysis - each think/act/observe cycle - tg:Conclusion - the final answer Also adds explicit TG types to GraphRAG entities: - tg:Question, tg:Exploration, tg:Focus, tg:Synthesis All types retain their PROV-O base types (prov:Activity, prov:Entity). Changes: - Add session_id and collection fields to AgentRequest schema - Add explainability entity types to namespaces.py - Create agent provenance triple generators - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render both trace types * Document RAG explainability is now complete. Here's a summary of the changes made: Schema Changes: - trustgraph-base/trustgraph/schema/services/retrieval.py: Added explain_id and explain_graph fields to DocumentRagResponse - trustgraph-base/trustgraph/messaging/translators/retrieval.py: Updated translator to handle explainability fields Provenance Changes: - trustgraph-base/trustgraph/provenance/namespaces.py: Added TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates - trustgraph-base/trustgraph/provenance/uris.py: Added docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri generators - trustgraph-base/trustgraph/provenance/triples.py: Added docrag_question_triples, docrag_exploration_triples, docrag_synthesis_triples builders - trustgraph-base/trustgraph/provenance/__init__.py: Exported all new Document RAG functions and predicates Service Changes: - trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py: Added explainability callback support and triple emission at each phase (Question → Exploration → Synthesis) - trustgraph-flow/trustgraph/retrieval/document_rag/rag.py: Registered explainability producer and wired up the callback Documentation: - docs/tech-specs/agent-explainability.md: Added Document RAG entity types and provenance model documentation Document RAG Provenance Model: Question (urn:trustgraph:docrag:{uuid}) │ │ tg:query, prov:startedAtTime │ rdf:type = prov:Activity, tg:Question │ ↓ prov:wasGeneratedBy │ Exploration (urn:trustgraph:docrag:{uuid}/exploration) │ │ tg:chunkCount, tg:selectedChunk (multiple) │ rdf:type = prov:Entity, tg:Exploration │ ↓ prov:wasDerivedFrom │ Synthesis (urn:trustgraph:docrag:{uuid}/synthesis) │ │ tg:content = "The answer..." │ rdf:type = prov:Entity, tg:Synthesis * Specific subtype that makes the retrieval mechanism immediately obvious: System: GraphRAG TG Types on Question: tg:Question, tg:GraphRagQuestion URI Pattern: urn:trustgraph:question:{uuid} ──────────────────────────────────────── System: Document RAG TG Types on Question: tg:Question, tg:DocRagQuestion URI Pattern: urn:trustgraph:docrag:{uuid} ──────────────────────────────────────── System: Agent TG Types on Question: tg:Question, tg:AgentQuestion URI Pattern: urn:trustgraph:agent:{uuid} Files modified: - trustgraph-base/trustgraph/provenance/namespaces.py - Added TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION - trustgraph-base/trustgraph/provenance/triples.py - Added subtype to question_triples and docrag_question_triples - trustgraph-base/trustgraph/provenance/agent.py - Added subtype to agent_session_triples - trustgraph-base/trustgraph/provenance/__init__.py - Exported new types - docs/tech-specs/agent-explainability.md - Documented the subtypes This allows: - Query all questions: ?q rdf:type tg:Question - Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion - Query only Document RAG: ?q rdf:type tg:DocRagQuestion - Query only Agent: ?q rdf:type tg:AgentQuestion * Fixed tests
2026-03-11 15:28:15 +00:00
# Verify result is tuple of (docs, chunk_ids)
docs, chunk_ids = result
assert "Document 1 content" in docs
assert "Document 2 content" in docs
assert "doc/c1" in chunk_ids
assert "doc/c2" in chunk_ids
@pytest.mark.asyncio
2026-03-07 23:38:09 +00:00
async def test_document_rag_query_method(self, mock_fetch_chunk):
"""Test DocumentRag.query method orchestrates full document RAG pipeline"""
mock_prompt_client = AsyncMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
2026-03-07 23:38:09 +00:00
# Mock concept extraction
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="test concept")
# Mock embeddings - one vector per concept
test_vectors = [[0.1, 0.2, 0.3]]
mock_embeddings_client.embed.return_value = test_vectors
mock_match1 = MagicMock()
mock_match1.chunk_id = "doc/c3"
mock_match1.score = 0.9
mock_match2 = MagicMock()
mock_match2.chunk_id = "doc/c4"
mock_match2.score = 0.8
expected_response = "This is the document RAG response"
2026-03-07 23:38:09 +00:00
mock_doc_embeddings_client.query.return_value = [mock_match1, mock_match2]
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.document_prompt.return_value = PromptResult(response_type="text", text=expected_response)
2026-03-07 23:38:09 +00:00
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
doc_embeddings_client=mock_doc_embeddings_client,
2026-03-07 23:38:09 +00:00
fetch_chunk=mock_fetch_chunk,
verbose=False
)
2026-03-07 23:38:09 +00:00
result = await document_rag.query(
query="test query",
user="test_user",
collection="test_collection",
doc_limit=10
)
2026-03-07 23:38:09 +00:00
# Verify concept extraction was called
mock_prompt_client.prompt.assert_called_once_with(
"extract-concepts",
variables={"query": "test query"}
)
# Verify embeddings called with extracted concepts
mock_embeddings_client.embed.assert_called_once_with(["test concept"])
2026-03-07 23:38:09 +00:00
# Verify doc embeddings client was called
mock_doc_embeddings_client.query.assert_called_once_with(
vector=[0.1, 0.2, 0.3],
limit=10,
user="test_user",
collection="test_collection"
)
2026-03-07 23:38:09 +00:00
# Verify prompt client was called with fetched documents and query
mock_prompt_client.document_prompt.assert_called_once()
call_args = mock_prompt_client.document_prompt.call_args
assert call_args.kwargs["query"] == "test query"
docs = call_args.kwargs["documents"]
assert "Relevant document content" in docs
assert "Another document" in docs
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
result_text, usage = result
assert result_text == expected_response
@pytest.mark.asyncio
2026-03-07 23:38:09 +00:00
async def test_document_rag_query_with_defaults(self, mock_fetch_chunk):
"""Test DocumentRag.query method with default parameters"""
mock_prompt_client = AsyncMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
2026-03-07 23:38:09 +00:00
# Mock concept extraction fallback (empty → raw query)
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="")
# Mock responses
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.return_value = [[[0.1, 0.2]]]
mock_match = MagicMock()
mock_match.chunk_id = "doc/c5"
mock_match.score = 0.9
mock_doc_embeddings_client.query.return_value = [mock_match]
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.document_prompt.return_value = PromptResult(response_type="text", text="Default response")
2026-03-07 23:38:09 +00:00
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
2026-03-07 23:38:09 +00:00
doc_embeddings_client=mock_doc_embeddings_client,
fetch_chunk=mock_fetch_chunk
)
2026-03-07 23:38:09 +00:00
result = await document_rag.query("simple query")
2026-03-07 23:38:09 +00:00
# Verify default parameters were used
mock_doc_embeddings_client.query.assert_called_once_with(
vector=[[0.1, 0.2]],
limit=20, # Default doc_limit
user="trustgraph", # Default user
collection="default" # Default collection
)
2026-03-07 23:38:09 +00:00
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
result_text, usage = result
assert result_text == "Default response"
@pytest.mark.asyncio
async def test_get_docs_with_verbose_output(self):
"""Test Query.get_docs method with verbose logging"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
mock_rag.doc_embeddings_client = mock_doc_embeddings_client
2026-03-07 23:38:09 +00:00
# Mock fetch_chunk
async def mock_fetch(chunk_id, user):
return CHUNK_CONTENT.get(chunk_id, f"Content for {chunk_id}")
mock_rag.fetch_chunk = mock_fetch
# Mock responses - one vector per concept
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.return_value = [[[0.7, 0.8]]]
mock_match = MagicMock()
mock_match.chunk_id = "doc/c6"
mock_match.score = 0.88
mock_doc_embeddings_client.query.return_value = [mock_match]
2026-03-07 23:38:09 +00:00
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=True,
doc_limit=5
)
2026-03-07 23:38:09 +00:00
# Call get_docs with concepts
result = await query.get_docs(["verbose test"])
2026-03-07 23:38:09 +00:00
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.assert_called_once_with(["verbose test"])
mock_doc_embeddings_client.query.assert_called_once()
2026-03-07 23:38:09 +00:00
Adding explainability to the ReACT agent (#689) * Added tech spec * Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Agent traces record: - Session start with query and timestamp - Each iteration's thought, action, arguments, and observation - Final answer with derivation chain Changes: - Add session_id and collection fields to AgentRequest schema - Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces - Create agent provenance triple generators in provenance/agent.py - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render agent traces alongside GraphRAG * Updated explainability taxonomy: GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis Agent: tg:Question → tg:Analysis(s) → tg:Conclusion All entities also have their PROV-O type (prov:Activity or prov:Entity). Updated commit message: Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Entity types follow human reasoning patterns: - tg:Question - the user's query (shared with GraphRAG) - tg:Analysis - each think/act/observe cycle - tg:Conclusion - the final answer Also adds explicit TG types to GraphRAG entities: - tg:Question, tg:Exploration, tg:Focus, tg:Synthesis All types retain their PROV-O base types (prov:Activity, prov:Entity). Changes: - Add session_id and collection fields to AgentRequest schema - Add explainability entity types to namespaces.py - Create agent provenance triple generators - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render both trace types * Document RAG explainability is now complete. Here's a summary of the changes made: Schema Changes: - trustgraph-base/trustgraph/schema/services/retrieval.py: Added explain_id and explain_graph fields to DocumentRagResponse - trustgraph-base/trustgraph/messaging/translators/retrieval.py: Updated translator to handle explainability fields Provenance Changes: - trustgraph-base/trustgraph/provenance/namespaces.py: Added TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates - trustgraph-base/trustgraph/provenance/uris.py: Added docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri generators - trustgraph-base/trustgraph/provenance/triples.py: Added docrag_question_triples, docrag_exploration_triples, docrag_synthesis_triples builders - trustgraph-base/trustgraph/provenance/__init__.py: Exported all new Document RAG functions and predicates Service Changes: - trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py: Added explainability callback support and triple emission at each phase (Question → Exploration → Synthesis) - trustgraph-flow/trustgraph/retrieval/document_rag/rag.py: Registered explainability producer and wired up the callback Documentation: - docs/tech-specs/agent-explainability.md: Added Document RAG entity types and provenance model documentation Document RAG Provenance Model: Question (urn:trustgraph:docrag:{uuid}) │ │ tg:query, prov:startedAtTime │ rdf:type = prov:Activity, tg:Question │ ↓ prov:wasGeneratedBy │ Exploration (urn:trustgraph:docrag:{uuid}/exploration) │ │ tg:chunkCount, tg:selectedChunk (multiple) │ rdf:type = prov:Entity, tg:Exploration │ ↓ prov:wasDerivedFrom │ Synthesis (urn:trustgraph:docrag:{uuid}/synthesis) │ │ tg:content = "The answer..." │ rdf:type = prov:Entity, tg:Synthesis * Specific subtype that makes the retrieval mechanism immediately obvious: System: GraphRAG TG Types on Question: tg:Question, tg:GraphRagQuestion URI Pattern: urn:trustgraph:question:{uuid} ──────────────────────────────────────── System: Document RAG TG Types on Question: tg:Question, tg:DocRagQuestion URI Pattern: urn:trustgraph:docrag:{uuid} ──────────────────────────────────────── System: Agent TG Types on Question: tg:Question, tg:AgentQuestion URI Pattern: urn:trustgraph:agent:{uuid} Files modified: - trustgraph-base/trustgraph/provenance/namespaces.py - Added TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION - trustgraph-base/trustgraph/provenance/triples.py - Added subtype to question_triples and docrag_question_triples - trustgraph-base/trustgraph/provenance/agent.py - Added subtype to agent_session_triples - trustgraph-base/trustgraph/provenance/__init__.py - Exported new types - docs/tech-specs/agent-explainability.md - Documented the subtypes This allows: - Query all questions: ?q rdf:type tg:Question - Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion - Query only Document RAG: ?q rdf:type tg:DocRagQuestion - Query only Agent: ?q rdf:type tg:AgentQuestion * Fixed tests
2026-03-11 15:28:15 +00:00
docs, chunk_ids = result
assert "Verbose test doc" in docs
assert "doc/c6" in chunk_ids
@pytest.mark.asyncio
2026-03-07 23:38:09 +00:00
async def test_document_rag_query_with_verbose(self, mock_fetch_chunk):
"""Test DocumentRag.query method with verbose logging enabled"""
mock_prompt_client = AsyncMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
2026-03-07 23:38:09 +00:00
# Mock concept extraction
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="verbose query test")
# Mock responses
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.return_value = [[[0.3, 0.4]]]
mock_match = MagicMock()
mock_match.chunk_id = "doc/c7"
mock_match.score = 0.92
mock_doc_embeddings_client.query.return_value = [mock_match]
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.document_prompt.return_value = PromptResult(response_type="text", text="Verbose RAG response")
2026-03-07 23:38:09 +00:00
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
doc_embeddings_client=mock_doc_embeddings_client,
2026-03-07 23:38:09 +00:00
fetch_chunk=mock_fetch_chunk,
verbose=True
)
2026-03-07 23:38:09 +00:00
result = await document_rag.query("verbose query test")
2026-03-07 23:38:09 +00:00
mock_embeddings_client.embed.assert_called_once()
mock_doc_embeddings_client.query.assert_called_once()
2026-03-07 23:38:09 +00:00
call_args = mock_prompt_client.document_prompt.call_args
assert call_args.kwargs["query"] == "verbose query test"
assert "Verbose doc content" in call_args.kwargs["documents"]
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
result_text, usage = result
assert result_text == "Verbose RAG response"
@pytest.mark.asyncio
async def test_get_docs_with_empty_results(self):
"""Test Query.get_docs method when no documents are found"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
mock_rag.doc_embeddings_client = mock_doc_embeddings_client
2026-03-07 23:38:09 +00:00
async def mock_fetch(chunk_id, user):
return f"Content for {chunk_id}"
mock_rag.fetch_chunk = mock_fetch
# Mock responses - empty results
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.return_value = [[[0.1, 0.2]]]
mock_doc_embeddings_client.query.return_value = []
2026-03-07 23:38:09 +00:00
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False
)
2026-03-07 23:38:09 +00:00
result = await query.get_docs(["query with no results"])
2026-03-07 23:38:09 +00:00
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.assert_called_once_with(["query with no results"])
mock_doc_embeddings_client.query.assert_called_once()
2026-03-07 23:38:09 +00:00
Adding explainability to the ReACT agent (#689) * Added tech spec * Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Agent traces record: - Session start with query and timestamp - Each iteration's thought, action, arguments, and observation - Final answer with derivation chain Changes: - Add session_id and collection fields to AgentRequest schema - Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces - Create agent provenance triple generators in provenance/agent.py - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render agent traces alongside GraphRAG * Updated explainability taxonomy: GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis Agent: tg:Question → tg:Analysis(s) → tg:Conclusion All entities also have their PROV-O type (prov:Activity or prov:Entity). Updated commit message: Add provenance recording to React agent loop Enables agent sessions to be traced and debugged using the same explainability infrastructure as GraphRAG. Entity types follow human reasoning patterns: - tg:Question - the user's query (shared with GraphRAG) - tg:Analysis - each think/act/observe cycle - tg:Conclusion - the final answer Also adds explicit TG types to GraphRAG entities: - tg:Question, tg:Exploration, tg:Focus, tg:Synthesis All types retain their PROV-O base types (prov:Activity, prov:Entity). Changes: - Add session_id and collection fields to AgentRequest schema - Add explainability entity types to namespaces.py - Create agent provenance triple generators - Register explainability producer in agent service - Emit provenance triples during agent execution - Update CLI tools to detect and render both trace types * Document RAG explainability is now complete. Here's a summary of the changes made: Schema Changes: - trustgraph-base/trustgraph/schema/services/retrieval.py: Added explain_id and explain_graph fields to DocumentRagResponse - trustgraph-base/trustgraph/messaging/translators/retrieval.py: Updated translator to handle explainability fields Provenance Changes: - trustgraph-base/trustgraph/provenance/namespaces.py: Added TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates - trustgraph-base/trustgraph/provenance/uris.py: Added docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri generators - trustgraph-base/trustgraph/provenance/triples.py: Added docrag_question_triples, docrag_exploration_triples, docrag_synthesis_triples builders - trustgraph-base/trustgraph/provenance/__init__.py: Exported all new Document RAG functions and predicates Service Changes: - trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py: Added explainability callback support and triple emission at each phase (Question → Exploration → Synthesis) - trustgraph-flow/trustgraph/retrieval/document_rag/rag.py: Registered explainability producer and wired up the callback Documentation: - docs/tech-specs/agent-explainability.md: Added Document RAG entity types and provenance model documentation Document RAG Provenance Model: Question (urn:trustgraph:docrag:{uuid}) │ │ tg:query, prov:startedAtTime │ rdf:type = prov:Activity, tg:Question │ ↓ prov:wasGeneratedBy │ Exploration (urn:trustgraph:docrag:{uuid}/exploration) │ │ tg:chunkCount, tg:selectedChunk (multiple) │ rdf:type = prov:Entity, tg:Exploration │ ↓ prov:wasDerivedFrom │ Synthesis (urn:trustgraph:docrag:{uuid}/synthesis) │ │ tg:content = "The answer..." │ rdf:type = prov:Entity, tg:Synthesis * Specific subtype that makes the retrieval mechanism immediately obvious: System: GraphRAG TG Types on Question: tg:Question, tg:GraphRagQuestion URI Pattern: urn:trustgraph:question:{uuid} ──────────────────────────────────────── System: Document RAG TG Types on Question: tg:Question, tg:DocRagQuestion URI Pattern: urn:trustgraph:docrag:{uuid} ──────────────────────────────────────── System: Agent TG Types on Question: tg:Question, tg:AgentQuestion URI Pattern: urn:trustgraph:agent:{uuid} Files modified: - trustgraph-base/trustgraph/provenance/namespaces.py - Added TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION - trustgraph-base/trustgraph/provenance/triples.py - Added subtype to question_triples and docrag_question_triples - trustgraph-base/trustgraph/provenance/agent.py - Added subtype to agent_session_triples - trustgraph-base/trustgraph/provenance/__init__.py - Exported new types - docs/tech-specs/agent-explainability.md - Documented the subtypes This allows: - Query all questions: ?q rdf:type tg:Question - Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion - Query only Document RAG: ?q rdf:type tg:DocRagQuestion - Query only Agent: ?q rdf:type tg:AgentQuestion * Fixed tests
2026-03-11 15:28:15 +00:00
assert result == ([], [])
@pytest.mark.asyncio
2026-03-07 23:38:09 +00:00
async def test_document_rag_query_with_empty_documents(self, mock_fetch_chunk):
"""Test DocumentRag.query method when no documents are retrieved"""
mock_prompt_client = AsyncMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
2026-03-07 23:38:09 +00:00
# Mock concept extraction
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="query with no matching docs")
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.return_value = [[[0.5, 0.6]]]
mock_doc_embeddings_client.query.return_value = []
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.document_prompt.return_value = PromptResult(response_type="text", text="No documents found response")
2026-03-07 23:38:09 +00:00
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
doc_embeddings_client=mock_doc_embeddings_client,
2026-03-07 23:38:09 +00:00
fetch_chunk=mock_fetch_chunk,
verbose=False
)
2026-03-07 23:38:09 +00:00
result = await document_rag.query("query with no matching docs")
2026-03-07 23:38:09 +00:00
mock_prompt_client.document_prompt.assert_called_once_with(
query="query with no matching docs",
documents=[]
)
2026-03-07 23:38:09 +00:00
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
result_text, usage = result
assert result_text == "No documents found response"
@pytest.mark.asyncio
async def test_get_vectors_with_verbose(self):
"""Test Query.get_vectors method with verbose logging"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
2026-03-07 23:38:09 +00:00
expected_vectors = [[0.9, 1.0, 1.1]]
mock_embeddings_client.embed.return_value = expected_vectors
2026-03-07 23:38:09 +00:00
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=True
)
2026-03-07 23:38:09 +00:00
result = await query.get_vectors(["verbose vector test"])
2026-03-07 23:38:09 +00:00
2026-03-08 19:42:26 +00:00
mock_embeddings_client.embed.assert_called_once_with(["verbose vector test"])
assert result == expected_vectors
@pytest.mark.asyncio
2026-03-07 23:38:09 +00:00
async def test_document_rag_integration_flow(self, mock_fetch_chunk):
"""Test complete DocumentRag integration with realistic data flow"""
mock_prompt_client = AsyncMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
2026-03-07 23:38:09 +00:00
query_text = "What is machine learning?"
final_response = "Machine learning is a field of AI that enables computers to learn and improve from experience without being explicitly programmed."
2026-03-07 23:38:09 +00:00
# Mock concept extraction
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.prompt.return_value = PromptResult(response_type="text", text="machine learning\nartificial intelligence")
# Mock embeddings - one vector per concept
query_vectors = [[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1.0]]
mock_embeddings_client.embed.return_value = query_vectors
# Each concept query returns some matches
mock_matches_1 = [
MagicMock(chunk_id="doc/ml1", score=0.9),
MagicMock(chunk_id="doc/ml2", score=0.85),
]
mock_matches_2 = [
MagicMock(chunk_id="doc/ml2", score=0.88), # duplicate
MagicMock(chunk_id="doc/ml3", score=0.82),
]
mock_doc_embeddings_client.query.side_effect = [mock_matches_1, mock_matches_2]
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
mock_prompt_client.document_prompt.return_value = PromptResult(response_type="text", text=final_response)
2026-03-07 23:38:09 +00:00
document_rag = DocumentRag(
prompt_client=mock_prompt_client,
embeddings_client=mock_embeddings_client,
doc_embeddings_client=mock_doc_embeddings_client,
2026-03-07 23:38:09 +00:00
fetch_chunk=mock_fetch_chunk,
verbose=False
)
2026-03-07 23:38:09 +00:00
result = await document_rag.query(
query=query_text,
2026-03-07 23:38:09 +00:00
user="research_user",
collection="ml_knowledge",
doc_limit=25
)
2026-03-07 23:38:09 +00:00
# Verify concept extraction
mock_prompt_client.prompt.assert_called_once_with(
"extract-concepts",
variables={"query": query_text}
)
2026-03-07 23:38:09 +00:00
# Verify embeddings called with concepts
mock_embeddings_client.embed.assert_called_once_with(
["machine learning", "artificial intelligence"]
)
2026-03-07 23:38:09 +00:00
# Verify two per-concept queries were made (25 // 2 = 12 per concept)
assert mock_doc_embeddings_client.query.call_count == 2
2026-03-07 23:38:09 +00:00
# Verify prompt client was called with fetched document content
mock_prompt_client.document_prompt.assert_called_once()
call_args = mock_prompt_client.document_prompt.call_args
assert call_args.kwargs["query"] == query_text
# Verify documents were fetched and deduplicated
2026-03-07 23:38:09 +00:00
docs = call_args.kwargs["documents"]
assert "Machine learning is a subset of artificial intelligence..." in docs
assert "ML algorithms learn patterns from data to make predictions..." in docs
assert "Common ML techniques include supervised and unsupervised learning..." in docs
assert len(docs) == 3 # doc/ml2 deduplicated
2026-03-07 23:38:09 +00:00
Expose LLM token usage across all service layers (#782) Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
2026-04-13 14:38:34 +01:00
result_text, usage = result
assert result_text == final_response
@pytest.mark.asyncio
async def test_get_docs_deduplicates_across_concepts(self):
"""Test that get_docs deduplicates chunks across multiple concepts"""
mock_rag = MagicMock()
mock_embeddings_client = AsyncMock()
mock_doc_embeddings_client = AsyncMock()
mock_rag.embeddings_client = mock_embeddings_client
mock_rag.doc_embeddings_client = mock_doc_embeddings_client
async def mock_fetch(chunk_id, user):
return CHUNK_CONTENT.get(chunk_id, f"Content for {chunk_id}")
mock_rag.fetch_chunk = mock_fetch
# Two concepts → two vectors
mock_embeddings_client.embed.return_value = [[0.1, 0.2], [0.3, 0.4]]
# Both queries return overlapping chunks
match_a = MagicMock(chunk_id="doc/c1", score=0.9)
match_b = MagicMock(chunk_id="doc/c2", score=0.8)
match_c = MagicMock(chunk_id="doc/c1", score=0.85) # duplicate
mock_doc_embeddings_client.query.side_effect = [
[match_a, match_b],
[match_c],
]
query = Query(
rag=mock_rag,
user="test_user",
collection="test_collection",
verbose=False,
doc_limit=10
)
docs, chunk_ids = await query.get_docs(["concept A", "concept B"])
assert len(chunk_ids) == 2 # doc/c1 only counted once
assert "doc/c1" in chunk_ids
assert "doc/c2" in chunk_ids