mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 17:06:22 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -60,11 +60,12 @@ TG_SOURCE_CHAR_LENGTH = TG + "sourceCharLength"
|
|||
|
||||
# Query-time provenance predicates (GraphRAG)
|
||||
TG_QUERY = TG + "query"
|
||||
TG_CONCEPT = TG + "concept"
|
||||
TG_ENTITY = TG + "entity"
|
||||
TG_EDGE_COUNT = TG + "edgeCount"
|
||||
TG_SELECTED_EDGE = TG + "selectedEdge"
|
||||
TG_EDGE = TG + "edge"
|
||||
TG_REASONING = TG + "reasoning"
|
||||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document" # Reference to document in librarian
|
||||
|
||||
# Query-time provenance predicates (DocumentRAG)
|
||||
|
|
@ -79,27 +80,29 @@ TG_SUBGRAPH_TYPE = TG + "Subgraph"
|
|||
|
||||
# Explainability entity types (shared)
|
||||
TG_QUESTION = TG + "Question"
|
||||
TG_GROUNDING = TG + "Grounding"
|
||||
TG_EXPLORATION = TG + "Exploration"
|
||||
TG_FOCUS = TG + "Focus"
|
||||
TG_SYNTHESIS = TG + "Synthesis"
|
||||
TG_ANALYSIS = TG + "Analysis"
|
||||
TG_CONCLUSION = TG + "Conclusion"
|
||||
|
||||
# Unifying types for answer and intermediate commentary
|
||||
TG_ANSWER_TYPE = TG + "Answer" # Final answer (Synthesis, Conclusion)
|
||||
TG_REFLECTION_TYPE = TG + "Reflection" # Intermediate commentary (Thought, Observation)
|
||||
TG_THOUGHT_TYPE = TG + "Thought" # Agent reasoning
|
||||
TG_OBSERVATION_TYPE = TG + "Observation" # Agent tool result
|
||||
|
||||
# Question subtypes (to distinguish retrieval mechanism)
|
||||
TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
|
||||
TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
|
||||
TG_AGENT_QUESTION = TG + "AgentQuestion"
|
||||
|
||||
# Agent provenance predicates
|
||||
TG_THOUGHT = TG + "thought"
|
||||
TG_THOUGHT = TG + "thought" # Links iteration to thought sub-entity
|
||||
TG_ACTION = TG + "action"
|
||||
TG_ARGUMENTS = TG + "arguments"
|
||||
TG_OBSERVATION = TG + "observation"
|
||||
TG_ANSWER = TG + "answer"
|
||||
|
||||
# Agent document references (for librarian storage)
|
||||
TG_THOUGHT_DOCUMENT = TG + "thoughtDocument"
|
||||
TG_OBSERVATION_DOCUMENT = TG + "observationDocument"
|
||||
TG_OBSERVATION = TG + "observation" # Links iteration to observation sub-entity
|
||||
|
||||
# Named graph URIs for RDF datasets
|
||||
# These separate different types of data while keeping them in the same collection
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue