mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 08:56:21 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -86,13 +86,18 @@ class TestGraphRagIntegration:
|
|||
"""Mock prompt client that generates realistic responses for two-step process"""
|
||||
client = AsyncMock()
|
||||
|
||||
# Mock responses for the two-step process:
|
||||
# 1. kg-edge-selection returns JSONL with edge IDs
|
||||
# 2. kg-synthesis returns the final answer
|
||||
# Mock responses for the multi-step process:
|
||||
# 1. extract-concepts extracts key concepts from the query
|
||||
# 2. kg-edge-scoring scores edges for relevance
|
||||
# 3. kg-edge-reasoning provides reasoning for selected edges
|
||||
# 4. kg-synthesis returns the final answer
|
||||
async def mock_prompt(prompt_name, variables=None, streaming=False, chunk_callback=None):
|
||||
if prompt_name == "kg-edge-selection":
|
||||
# Return empty selection (no edges selected) - valid JSONL
|
||||
return ""
|
||||
if prompt_name == "extract-concepts":
|
||||
return "" # Falls back to raw query
|
||||
elif prompt_name == "kg-edge-scoring":
|
||||
return "" # No edges scored
|
||||
elif prompt_name == "kg-edge-reasoning":
|
||||
return "" # No reasoning
|
||||
elif prompt_name == "kg-synthesis":
|
||||
return (
|
||||
"Machine learning is a subset of artificial intelligence that enables computers "
|
||||
|
|
@ -160,16 +165,16 @@ class TestGraphRagIntegration:
|
|||
# 3. Should query triples to build knowledge subgraph
|
||||
assert mock_triples_client.query_stream.call_count > 0
|
||||
|
||||
# 4. Should call prompt twice (edge selection + synthesis)
|
||||
assert mock_prompt_client.prompt.call_count == 2
|
||||
# 4. Should call prompt four times (extract-concepts + edge-scoring + edge-reasoning + synthesis)
|
||||
assert mock_prompt_client.prompt.call_count == 4
|
||||
|
||||
# Verify final response
|
||||
assert response is not None
|
||||
assert isinstance(response, str)
|
||||
assert "machine learning" in response.lower()
|
||||
|
||||
# Verify provenance was emitted in real-time (4 events: question, exploration, focus, synthesis)
|
||||
assert len(provenance_events) == 4
|
||||
# Verify provenance was emitted in real-time (5 events: question, grounding, exploration, focus, synthesis)
|
||||
assert len(provenance_events) == 5
|
||||
for triples, prov_id in provenance_events:
|
||||
assert isinstance(triples, list)
|
||||
assert prov_id.startswith("urn:trustgraph:")
|
||||
|
|
@ -243,10 +248,10 @@ class TestGraphRagIntegration:
|
|||
)
|
||||
|
||||
# Assert
|
||||
# Should still call prompt client (twice: edge selection + synthesis)
|
||||
# Should still call prompt client
|
||||
assert response is not None
|
||||
# Provenance should still be emitted (4 events)
|
||||
assert len(provenance_events) == 4
|
||||
# Provenance should still be emitted (5 events)
|
||||
assert len(provenance_events) == 5
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_graph_rag_label_caching(self, graph_rag, mock_triples_client):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue