mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -60,8 +60,12 @@ class TestGraphRagStreaming:
|
|||
full_text = "Machine learning is a subset of artificial intelligence that focuses on algorithms that learn from data."
|
||||
|
||||
async def prompt_side_effect(prompt_id, variables, streaming=False, chunk_callback=None, **kwargs):
|
||||
if prompt_id == "kg-edge-selection":
|
||||
# Edge selection returns JSONL with IDs - simulate selecting first edge
|
||||
if prompt_id == "extract-concepts":
|
||||
return "" # Falls back to raw query
|
||||
elif prompt_id == "kg-edge-scoring":
|
||||
# Edge scoring returns JSONL with IDs and scores
|
||||
return '{"id": "abc12345", "score": 0.9}\n'
|
||||
elif prompt_id == "kg-edge-reasoning":
|
||||
return '{"id": "abc12345", "reasoning": "Relevant to query"}\n'
|
||||
elif prompt_id == "kg-synthesis":
|
||||
if streaming and chunk_callback:
|
||||
|
|
@ -132,8 +136,8 @@ class TestGraphRagStreaming:
|
|||
# Verify content is reasonable
|
||||
assert "machine" in response.lower() or "learning" in response.lower()
|
||||
|
||||
# Verify provenance was emitted in real-time (4 events)
|
||||
assert len(provenance_events) == 4
|
||||
# Verify provenance was emitted in real-time (5 events: question, grounding, exploration, focus, synthesis)
|
||||
assert len(provenance_events) == 5
|
||||
for triples, prov_id in provenance_events:
|
||||
assert prov_id.startswith("urn:trustgraph:")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue